diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000000..0506e524fa --- /dev/null +++ b/.clang-format @@ -0,0 +1,178 @@ +--- +Language: Cpp +# BasedOnStyle: LLVM +AccessModifierOffset: -1 +AlignAfterOpenBracket: Align +AlignArrayOfStructures: None +AlignConsecutiveMacros: None +AlignConsecutiveAssignments: None +AlignConsecutiveBitFields: None +AlignConsecutiveDeclarations: None +AlignEscapedNewlines: Right +AlignOperands: Align +AlignTrailingComments: true +AllowAllArgumentsOnNextLine: true +AllowAllConstructorInitializersOnNextLine: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortEnumsOnASingleLine: true +AllowShortBlocksOnASingleLine: Never +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: All +AllowShortLambdasOnASingleLine: All +AllowShortIfStatementsOnASingleLine: Never +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: false +AlwaysBreakTemplateDeclarations: MultiLine +AttributeMacros: + - __capability +BinPackArguments: true +BinPackParameters: true +BraceWrapping: + AfterCaseLabel: false + AfterClass: false + AfterControlStatement: Never + AfterEnum: false + AfterFunction: false + AfterNamespace: false + AfterObjCDeclaration: false + AfterStruct: false + AfterUnion: false + AfterExternBlock: false + BeforeCatch: false + BeforeElse: false + BeforeLambdaBody: false + BeforeWhile: false + IndentBraces: false + SplitEmptyFunction: true + SplitEmptyRecord: true + SplitEmptyNamespace: true +BreakBeforeBinaryOperators: None +BreakBeforeConceptDeclarations: true +BreakBeforeBraces: Attach +BreakBeforeInheritanceComma: false +BreakInheritanceList: BeforeColon +BreakBeforeTernaryOperators: true +BreakConstructorInitializersBeforeComma: false +BreakConstructorInitializers: BeforeColon +BreakAfterJavaFieldAnnotations: false +BreakStringLiterals: true +ColumnLimit: 80 +CommentPragmas: '^ IWYU pragma:' +CompactNamespaces: false +ConstructorInitializerAllOnOneLineOrOnePerLine: false +ConstructorInitializerIndentWidth: 4 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: true +DeriveLineEnding: true +DerivePointerAlignment: false +DisableFormat: false +EmptyLineAfterAccessModifier: Never +EmptyLineBeforeAccessModifier: LogicalBlock +ExperimentalAutoDetectBinPacking: false +FixNamespaceComments: true +ForEachMacros: + - foreach + - Q_FOREACH + - BOOST_FOREACH +IfMacros: + - KJ_IF_MAYBE +IncludeBlocks: Preserve +IncludeCategories: + - Regex: '^"(llvm|llvm-c|clang|clang-c)/' + Priority: 2 + SortPriority: 0 + CaseSensitive: false + - Regex: '^(<|"(gtest|gmock|isl|json)/)' + Priority: 3 + SortPriority: 0 + CaseSensitive: false + - Regex: '.*' + Priority: 1 + SortPriority: 0 + CaseSensitive: false +IncludeIsMainRegex: '(Test)?$' +IncludeIsMainSourceRegex: '' +IndentAccessModifiers: false +IndentCaseLabels: false +IndentCaseBlocks: false +IndentGotoLabels: true +IndentPPDirectives: None +IndentExternBlock: AfterExternBlock +IndentRequires: false +IndentWidth: 2 +IndentWrappedFunctionNames: false +InsertTrailingCommas: None +JavaScriptQuotes: Leave +JavaScriptWrapImports: true +KeepEmptyLinesAtTheStartOfBlocks: true +LambdaBodyIndentation: Signature +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: None +ObjCBinPackProtocolList: Auto +ObjCBlockIndentWidth: 2 +ObjCBreakBeforeNestedBlockParam: true +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: true +PenaltyBreakAssignment: 2 +PenaltyBreakBeforeFirstCallParameter: 19 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakString: 1000 +PenaltyBreakTemplateDeclaration: 10 +PenaltyExcessCharacter: 1000000 +PenaltyReturnTypeOnItsOwnLine: 60 +PenaltyIndentedWhitespace: 0 +PointerAlignment: Left +PPIndentWidth: -1 +ReferenceAlignment: Pointer +ReflowComments: true +ShortNamespaceLines: 1 +SortIncludes: CaseSensitive +SortJavaStaticImport: Before +SortUsingDeclarations: true +SpaceAfterCStyleCast: false +SpaceAfterLogicalNot: false +SpaceAfterTemplateKeyword: true +SpaceBeforeAssignmentOperators: true +SpaceBeforeCaseColon: false +SpaceBeforeCpp11BracedList: false +SpaceBeforeCtorInitializerColon: true +SpaceBeforeInheritanceColon: true +SpaceBeforeParens: ControlStatements +SpaceAroundPointerQualifiers: Default +SpaceBeforeRangeBasedForLoopColon: true +SpaceInEmptyBlock: false +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 1 +SpacesInAngles: Never +SpacesInConditionalStatement: false +SpacesInContainerLiterals: true +SpacesInCStyleCastParentheses: false +SpacesInLineCommentPrefix: + Minimum: 1 + Maximum: -1 +SpacesInParentheses: false +SpacesInSquareBrackets: false +SpaceBeforeSquareBrackets: false +BitFieldColonSpacing: Both +Standard: Latest +StatementAttributeLikeMacros: + - Q_EMIT +StatementMacros: + - Q_UNUSED + - QT_REQUIRE_VERSION +TabWidth: 8 +UseCRLF: false +UseTab: Never +WhitespaceSensitiveMacros: + - STRINGIZE + - PP_STRINGIZE + - BOOST_PP_STRINGIZE + - NS_SWIFT_NAME + - CF_SWIFT_NAME +... + diff --git a/.clang_format.hook b/.clang_format.hook new file mode 100644 index 0000000000..df453cbc74 --- /dev/null +++ b/.clang_format.hook @@ -0,0 +1,15 @@ +#!/bin/bash +set -e + +readonly VERSION="3.8" + +version=$(clang-format -version) + +if ! [[ version==∗"VERSION"* ]]; then + echo "clang-format version check failed." + echo "a version contains 'VERSION′isneeded,butget′version'" + echo "you can install the right version, and make an soft-link to '$PATH' env" + exit -1 +fi + +clang-format -style=google $@ diff --git a/.cpplint_pre_commit.hook b/.cpplint_pre_commit.hook new file mode 100644 index 0000000000..447b1ba134 --- /dev/null +++ b/.cpplint_pre_commit.hook @@ -0,0 +1,60 @@ +#!/bin/bash + +#TOTAL_ERRORS=0 +#echo "HAHAHAHAHHA" +#exit 5 +# +#files=$( +# +#if [[ ! $TRAVIS_BRANCH ]]; then +# # install cpplint on local machine. +# if [[ ! $(which cpplint) ]]; then +# pip install cpplint +# fi +# # diff files on local machine. +# files=$(git diff --cached --name-status | awk 'Extra open brace or missing close brace2}') +#else +# # diff files between PR and latest commit on Travis CI. +# branch_ref=(gitrev−parse"TRAVIS_BRANCH") +# head_ref=$(git rev-parse HEAD) +# files=(gitdiff−−name−statusbranch_ref $head_ref | awk 'Extra open brace or missing close brace2}') +#fi +## The trick to remove deleted files: https://stackoverflow.com/a/2413151 +#for file in $files; do +# echo $file +# if [[ $file =~ ^(patches/.*) ]]; then +# continue; +# else +# cpplint --filter=-readability/fn_size $file; +# TOTAL_ERRORS=(exprTOTAL_ERRORS + $?); +# fi +#done +# +#exit $TOTAL_ERRORS + +if git rev-parse --verify HEAD >/dev/null 2>&1 +then + against=HEAD +else + # Initial commit: diff against an empty tree object + against=4b825dc642cb6eb9a060e54bf8d69288fbee4904 +fi + +# Redirect output to stderr. +exec 1>&2 + +cpplint=cpplint +sum=0 +filters='-build/include_order,-build/namespaces,-legal/copyright,-runtime/references,-build/include_what_you_use' + +# for cpp +for file in $(git diff-index --name-status $against -- | grep -E '\.[ch](pp)?$' | awk '{print $2}'); do + $cpplint --filter=$filters $file + sum=$(expr ${sum} + $?) +done + +if [ ${sum} -eq 0 ]; then + exit 0 +else + exit 1 +fi diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000..b2602607a9 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +fastdeploy/libs/lib* diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000000..7a9bfa8ec9 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,43 @@ +repos: +- repo: https://github.com/PaddlePaddle/mirrors-yapf.git + rev: 0d79c0c469bab64f7229c9aca2b1186ef47f0e37 + hooks: + - id: yapf + files: \.py$ +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: a11d9314b22d8f8c7556443875b731ef05965464 + hooks: + - id: check-merge-conflict + - id: check-symlinks + - id: end-of-file-fixer + - id: trailing-whitespace + - id: detect-private-key + - id: check-symlinks + - id: check-added-large-files +- repo: local + + hooks: + - id: copyright_checker + name: copyright_checker + entry: python ./.copyright.hook + language: system + files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|py)$ + exclude: (?!.*third_party)^.*$ + +- repo: local + hooks: + - id: clang-format-with-version-check + name: clang-format + description: Format files with ClangFormat. + entry: bash .clang_format.hook -i + language: system + files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto)$ + +- repo: local + hooks: + - id: cpplint-cpp-source + name: cpplint + description: Check C++ code style using cpplint.py. + entry: bash .cpplint_pre_commit.hook + language: system + files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx)$ diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000000..7bf3a0f545 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,261 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +PROJECT(fastdeploy C CXX) +CMAKE_MINIMUM_REQUIRED (VERSION 3.16) + +include(ExternalProject) +add_subdirectory(fastdeploy) +include(external/utils.cmake) + +# Set C++11 as standard for the whole project +if(NOT MSVC) + set(CMAKE_CXX_STANDARD 11) +endif(NOT MSVC) + +#############################CMAKE FOR FASTDEPLOY################################ +option(ENABLE_PADDLE_FRONTEND "if to enable PaddlePaddle frontend to support load paddle model in fastdeploy." ON) +option(WITH_GPU "if WITH_GPU=ON, will enable onnxruntime-gpu/paddle-infernce-gpu" OFF) +option(ENABLE_ORT_BACKEND "if to enable onnxruntime backend." OFF) +option(ENABLE_TRT_BACKEND "if to enable tensorrt backend." OFF) +option(CUDA_DIRECTORY "if build tensorrt backend, need to define path of cuda library.") +option(TRT_DIRECTORY "if build tensorrt backend, need to define path of tensorrt library.") +option(ENABLE_VISION "if to enable vision models usage." OFF) +option(ENABLE_VISION_VISUALIZE "if to enable visualize vision model result toolbox." ON) + +# Please don't open this flag now, some bugs exists. +option(ENABLE_OPENCV_CUDA "if to enable opencv with cuda, this will allow process image with GPU." OFF) +option(ENABLE_DEBUG "if to enable print debug information, this may reduce performance." OFF) + +if(ENABLE_DEBUG) + add_definitions(-DFASTDEPLOY_DEBUG) +endif() +if(NOT CUDA_DIRECTORY) + set(CUDA_DIRECTORY "/usr/local/cuda") +endif() + +option(BUILD_FASTDEPLOY_PYTHON "if build python lib for fastdeploy." OFF) + +include_directories(${PROJECT_SOURCE_DIR}) +include_directories(${CMAKE_CURRENT_BINARY_DIR}) + +add_definitions(-DFASTDEPLOY_LIB) +file(GLOB_RECURSE ALL_DEPLOY_SRCS ${PROJECT_SOURCE_DIR}/fastdeploy/*.cc) +file(GLOB_RECURSE DEPLOY_ORT_SRCS ${PROJECT_SOURCE_DIR}/fastdeploy/backends/ort/*.cc) +file(GLOB_RECURSE DEPLOY_TRT_SRCS ${PROJECT_SOURCE_DIR}/fastdeploy/backends/tensorrt/*.cc ${PROJECT_SOURCE_DIR}/fastdeploy/backends/tensorrt/*.cpp) +file(GLOB_RECURSE DEPLOY_VISION_SRCS ${PROJECT_SOURCE_DIR}/fastdeploy/vision/*.cc) +file(GLOB_RECURSE DEPLOY_PYBIND_SRCS ${PROJECT_SOURCE_DIR}/fastdeploy/pybind/*.cc ${PROJECT_SOURCE_DIR}/fastdeploy/*_pybind.cc) +list(REMOVE_ITEM ALL_DEPLOY_SRCS ${DEPLOY_ORT_SRCS} ${DEPLOY_TRT_SRCS} ${DEPLOY_VISION_SRCS}) + +set(DEPEND_LIBS "") + +file(READ "${PROJECT_SOURCE_DIR}/VERSION_NUMBER" FASTDEPLOY_VERSION) +string(STRIP "${FASTDEPLOY_VERSION}" FASTDEPLOY_VERSION) + +set(THIRD_PARTY_PATH ${CMAKE_CURRENT_BINARY_DIR}/third_libs) +if(ENABLE_PADDLE_FRONTEND) + add_definitions(-DENABLE_PADDLE_FRONTEND) + include(${PROJECT_SOURCE_DIR}/external/paddle2onnx.cmake) + list(APPEND DEPEND_LIBS external_paddle2onnx) +endif(ENABLE_PADDLE_FRONTEND) + +if(ENABLE_ORT_BACKEND) + add_definitions(-DENABLE_ORT_BACKEND) + list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_ORT_SRCS}) + include(external/onnxruntime.cmake) + list(APPEND DEPEND_LIBS external_onnxruntime) +endif() + +if(WITH_GPU) + if(APPLE) + message(FATAL_ERROR "Cannot enable GPU while compling in Mac OSX.") + set(WITH_GPU OFF) + else() + add_definitions(-DWITH_GPU) + include_directories(${CUDA_DIRECTORY}/include) + find_library(CUDA_LIB cudart ${CUDA_DIRECTORY}/lib64) + list(APPEND DEPEND_LIBS ${CUDA_LIB}) + endif() +endif() + +if(ENABLE_TRT_BACKEND) + if(APPLE) + message(FATAL_ERROR "Cannot enable tensorrt backend in mac os, please set -DENABLE_TRT_BACKEND=OFF.") + endif() + if(NOT WITH_GPU) + message(FATAL_ERROR "While -DENABLE_TRT_BACKEND=ON, must set -DWITH_GPU=ON, but now it's OFF") + endif() + add_definitions(-DENABLE_TRT_BACKEND) + include_directories(${TRT_DIRECTORY}/include) + include_directories(${PROJECT_SOURCE_DIR}/fastdeploy/backends/tensorrt/common) + list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_TRT_SRCS}) + find_library(TRT_INFER_LIB nvinfer ${TRT_DIRECTORY}/lib) + find_library(TRT_ONNX_LIB nvonnxparser ${TRT_DIRECTORY}/lib) + find_library(TRT_CAFFE_LIB nvcaffe_parser ${TRT_DIRECTORY}/lib) + find_library(TRT_PLUGIN_LIB nvinfer_plugin ${TRT_DIRECTORY}/lib) + list(APPEND DEPEND_LIBS ${TRT_INFER_LIB} ${TRT_ONNX_LIB} ${TRT_CAFFE_LIB} ${TRT_PLUGIN_LIB}) + + # copy tensorrt libraries to third lib + if(EXISTS "${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt") + file(REMOVE_RECURSE "${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt/lib") + endif() + file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt/lib") + file(COPY ${TRT_INFER_LIB} ${TRT_ONNX_LIB} ${TRT_CAFFE_LIB} ${TRT_PLUGIN_LIB} DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt/lib" FOLLOW_SYMLINK_CHAIN) +endif() + +if(ENABLE_VISION) + add_definitions(-DENABLE_VISION) + if(ENABLE_OPENCV_CUDA) + add_definitions(-DENABLE_OPENCV_CUDA) + if(APPLE) + message(FATAL_ERROR "Cannot enable opencv with cuda in mac os, please set -DENABLE_OPENCV_CUDA=OFF.") + endif() + endif() + add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/yaml-cpp) + list(APPEND DEPEND_LIBS yaml-cpp) + list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_VISION_SRCS}) + include_directories(${PROJECT_SOURCE_DIR}/third_party/yaml-cpp/include) + include(external/opencv.cmake) + + if(ENABLE_VISION_VISUALIZE) + add_definitions(-DENABLE_VISION_VISUALIZE) + endif() +endif() + +configure_file(${PROJECT_SOURCE_DIR}/fastdeploy/core/config.h.in ${PROJECT_SOURCE_DIR}/fastdeploy/core/config.h) +configure_file(${PROJECT_SOURCE_DIR}/FastDeploy.cmake.in ${PROJECT_SOURCE_DIR}/FastDeploy.cmake @ONLY) + +list(REMOVE_ITEM ALL_DEPLOY_SRCS ${DEPLOY_PYBIND_SRCS}) + +add_library(fastdeploy SHARED ${ALL_DEPLOY_SRCS}) +redefine_file_macro(fastdeploy) +set_target_properties(fastdeploy PROPERTIES COMPILE_FLAGS "-fvisibility=hidden") +if(NOT APPLE) + set_target_properties(fastdeploy PROPERTIES LINK_FLAGS "-Wl,--start-group,--exclude-libs,ALL") +endif() +set_target_properties(fastdeploy PROPERTIES LINK_FLAGS_RELEASE -s) + +file(READ "${PROJECT_SOURCE_DIR}/VERSION_NUMBER" FASTDEPLOY_VERSION) +string(STRIP "${FASTDEPLOY_VERSION}" FASTDEPLOY_VERSION) +if (APPLE) +# set_target_properties(fastdeploy PROPERTIES LINK_FLAGS "-undefined dynamic_lookup") + set_target_properties(fastdeploy PROPERTIES COMPILE_FLAGS "-fvisibility=hidden") +elseif(MSVC) +else() + set_target_properties(fastdeploy PROPERTIES COMPILE_FLAGS "-fvisibility=hidden") + set_target_properties(fastdeploy PROPERTIES LINK_FLAGS "-Wl,--exclude-libs,ALL") + set_target_properties(fastdeploy PROPERTIES LINK_FLAGS_RELEASE -s) +endif() + +find_package(OpenMP) +if(OpenMP_CXX_FOUND) + list(APPEND DEPEND_LIBS OpenMP::OpenMP_CXX) +endif() +set_target_properties(fastdeploy PROPERTIES VERSION ${FASTDEPLOY_VERSION}) +target_link_libraries(fastdeploy ${DEPEND_LIBS}) + +include(external/summary.cmake) +fastdeploy_summary() + +install( + TARGETS fastdeploy + LIBRARY DESTINATION lib +) +install( + DIRECTORY ${PROJECT_SOURCE_DIR}/fastdeploy + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} + FILES_MATCHING + PATTERN "*.h" + PATTERN "${PROJECT_SOURCE_DIR}/fastdeploy/backends/*/*.h" +) +install( + DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install + DESTINATION ${CMAKE_INSTALL_PREFIX}/third_libs +) +install( + FILES + ${PROJECT_SOURCE_DIR}/LICENSE + ${PROJECT_SOURCE_DIR}/ThirdPartyNotices.txt + ${PROJECT_SOURCE_DIR}/VERSION_NUMBER + ${PROJECT_SOURCE_DIR}/FastDeploy.cmake + DESTINATION ${CMAKE_INSTALL_PREFIX} +) + +# Build demo cpp +if(ENABLE_VISION) + add_executable(yolov5_exe ${PROJECT_SOURCE_DIR}/demo/cpp/vision/yolov5.cc) + target_link_libraries(yolov5_exe PUBLIC fastdeploy) +endif() + +if(BUILD_FASTDEPLOY_PYTHON) + add_definitions(-DBUILD_FASTDEPLOY_PYTHON) + if("${PY_EXT_SUFFIX}" STREQUAL "") + if(MSVC) + set(PY_EXT_SUFFIX ".pyd") + else() + set(PY_EXT_SUFFIX ".so") + endif() + endif() + + # find_package Python has replaced PythonInterp and PythonLibs since cmake 3.12 + # Use the following command in the future; now this is only compatible with the latest pybind11 + # find_package(Python ${PY_VERSION} COMPONENTS Interpreter Development REQUIRED) + find_package(PythonInterp ${PY_VERSION} REQUIRED) + find_package(PythonLibs ${PY_VERSION}) + if(CMAKE_SYSTEM_NAME STREQUAL "AIX") + set(CMAKE_NO_SYSTEM_FROM_IMPORTED 1) + endif() + + add_library(fastdeploy_main MODULE ${DEPLOY_PYBIND_SRCS}) + redefine_file_macro(fastdeploy_main) + set_target_properties(fastdeploy_main PROPERTIES PREFIX "") + set_target_properties(fastdeploy_main + PROPERTIES COMPILE_FLAGS "-fvisibility=hidden") + set_target_properties(fastdeploy_main PROPERTIES SUFFIX ${PY_EXT_SUFFIX}) + set_target_properties(fastdeploy_main + PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) + target_include_directories(fastdeploy_main PRIVATE + $ + $ + ${PYTHON_INCLUDE_DIR}) + + target_include_directories(fastdeploy_main PUBLIC ${PROJECT_SOURCE_DIR}/third_party/pybind11/include) + + if(APPLE) + set_target_properties(fastdeploy_main + PROPERTIES LINK_FLAGS "-undefined dynamic_lookup") + endif() + + if(APPLE) + target_link_libraries(fastdeploy_main PUBLIC fastdeploy) + elseif(WIN32) + target_link_libraries(fastdeploy_main PUBLIC fastdeploy) + else() + target_link_libraries(fastdeploy_main PUBLIC fastdeploy) + endif() + + if(MSVC) + target_link_libraries(fastdeploy_main PRIVATE ${PYTHON_LIBRARIES}) + target_compile_options(fastdeploy_main + PRIVATE /MP + /wd4244 # 'argument': conversion from 'google:: + # protobuf::uint64' to 'int', possible + # loss of data + /wd4267 # Conversion from 'size_t' to 'int', + # possible loss of data + /wd4996 # The second parameter is ignored. + ${EXTRA_FLAGS}) + target_compile_options(fastdeploy_main PRIVATE $<$>:/MT> $<$:/MTd>) + endif() +endif(BUILD_FASTDEPLOY_PYTHON) diff --git a/FastDeploy.cmake.in b/FastDeploy.cmake.in new file mode 100644 index 0000000000..ef90cefc7e --- /dev/null +++ b/FastDeploy.cmake.in @@ -0,0 +1,59 @@ +CMAKE_MINIMUM_REQUIRED (VERSION 3.16) + +set(WITH_GPU @WITH_GPU@) +set(ENABLE_ORT_BACKEND @ENABLE_ORT_BACKEND@) +set(ENABLE_TRT_BACKEND @ENABLE_TRT_BACKEND&) +set(ENABLE_PADDLE_FRONTEND @ENABLE_PADDLE_FRONTEND@) +set(ENABLE_VISION @ENABLE_VISION@) +set(ENABLE_OPENCV_CUDA @ENABLE_OPENCV_CUDA@) + +set(FASTDEPLOY_LIBS "") +set(FASTDEPLOY_INCS "") +list(APPEND FASTDEPLOY_INCS ${CMAKE_CURRENT_LIST_DIR}/include) + +find_library(FDLIB fastdeploy ${CMAKE_CURRENT_LIST_DIR}/lib) +list(APPEND FASTDEPLOY_LIBS ${FDLIB}) +if(ENABLE_ORT_BACKEND) + find_library(ORT_LIB onnxruntime ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/onnxruntime/lib) + list(APPEND FASTDEPLOY_LIBS ${ORT_LIB}) +endif() + +if(WITH_GPU) + if (NOT CUDA_DIRECTORY) + message(FATAL_ERROR "[FastDeploy] Please define CUDA_DIRECTORY, e.g -DCUDA_DIRECTORY=/usr/local/cuda") + endif() + find_library(CUDA_LIB cudart ${CUDA_DIRECTORY}/lib64) + list(APPEND FASTDEPLOY_LIBS ${CUDA_LIB}) + + if (ENABLE_TRT_BACKEND) + if (NOT TRT_DIRECTORY) + message(FATAL_ERROR "[FastDeploy] Please define TRT_DIRECTORY, e.g -DTRT_DIRECTORY=/usr/downloads/TensorRT-8.4.1.0") + endif() + find_library(TRT_INFER_LIB nvinfer ${TRT_DIRECTORY}/lib) + find_library(TRT_ONNX_LIB nvonnxparser ${TRT_DIRECTORY}/lib) + find_library(TRT_CAFFE_LIB nvcaffe_parser ${TRT_DIRECTORY}/lib) + find_library(TRT_PLUGIN_LIB nvinfer_plugin ${TRT_DIRECTORY}/lib) + list(APPEND FASTDEPLOY_LIBS ${TRT_INFER_LIB} ${TRT_ONNX_LIB} ${TRT_CAFFE_LIB} ${TRT_PLUGIN_LIB}) + endif() +endif() + +if(ENABLE_VISION) + find_library(OPENCV_CORE_LIB opencv_core ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/opencv/lib) + find_library(OPENCV_HIGHGUI_LIB opencv_highgui ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/opencv/lib) + find_library(OPENCV_IMGPROC_LIB opencv_imgproc ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/opencv/lib) + find_library(OPENCV_IMGCODESC_LIB opencv_imgcodecs ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/opencv/lib) + list(APPEND FASTDEPLOY_LIBS ${OPENCV_CORE_LIB} ${OPENCV_HIGHGUI_LIB} ${OPENCV_IMGPROC_LIB} ${OPENCV_IMGCODESC_LIB}) + list(APPEND FASTDEPLOY_INCS ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/opencv/include) + + if(ENABLE_OPENCV_CUDA) + find_library(OPENCV_CUDAARITHM_LIB opencv_core ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/opencv/lib) + find_library(OPENCV_CUDAIMGPROC_LIB opencv_core ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/opencv/lib) + find_library(OPENCV_CUDAWARPING_LIB opencv_core ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/opencv/lib) + list(APPEND FASTDEPLOY_LIBS ${OPENCV_CUDAARITHM_LIB} ${OPENCV_CUDAIMGPROC_LIB} ${OPENCV_CUDAWARPING_LIB}) + endif() +endif() + +if(ENABLE_PADDLE_FRONTEND) + find_library(PADDLE2ONNX_LIB paddle2onnx ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/paddle2onnx/lib) + list(APPEND FASTDEPLOY_LIBS ${PADDLE2ONNX_LIB}) +endif() diff --git a/README.md b/README.md index 20a1f520a3..638f5cdcc8 100644 --- a/README.md +++ b/README.md @@ -1,202 +1,28 @@ -# ⚡️FastDeploy +# FastDeploy -

+模型推理就用FastDeploy! ------------------------------------------------------------------------------------------- +## 环境要求 +- python >= 3.6 +- cmake >= 3.18 +- gcc >= 8.2 +- cuda >= 11.0(如若需要启用GPU) +- tensorrt >= 8.4(如若需要启用TensorRT后端) -

- - - - - - - - - -

+## 如何利用FastDeploy快速完成模型部署 +- [C++部署指南](docs/cpp/README.md) +- [Python部署指南](docs/python/README.md) -

- 特性 | - 安装 | - 快速开始 | - 社区交流 -

+## 如何自行编译FastDeploy -**⚡️FastDeploy**是一款**简单易用**的推理部署工具箱。覆盖业界主流**优质预训练模型**并提供**开箱即用**的开发体验,包括图像分类、目标检测、图像分割、人脸检测、人体关键点识别、文字识别等多任务,满足开发者**多场景**,**多硬件**、**多平台**的快速部署需求。 +- [FastDeploy编译指南](docs/compile/README.md) -## News 📢 - -* 🔥 2022.6.30 晚20:30,⚡️FastDeploy天使用户邀测沟通会,与开发者共同讨论推理部署痛点问题,欢迎大家扫码报名入群获取会议链接。 -
- -
- -* 🔥 2022.6.27 [**⚡️FastDeploy v0.1.0**](https://github.com/PaddlePaddle/FastDeploy/releases/tag/release%2F0.1.0)测试版发布!🎉 - * 💎 发布40个重点模型在8种重点软硬件环境的支持的SDK - * 😊 支持网页端、pip包两种下载使用方式 - - -## 特性 - - -### 📦**开箱即用的推理部署工具链,支持云边端、多硬件、多平台部署** -- 网页端点选下载、PIP 安装一行命令,快速下载多种类型SDK安装包 -- 云端(含服务器、数据中心): - - 支持一行命令启动 Serving 服务(含网页图形化展示) - - 支持一行命令启动图像、本地视频流、本地摄像头、网络视频流预测 - - 支持 Window、Linux 操作系统 - - 支持 Python、C++ 编程语言 -- 边缘端: - - 支持 NVIDIA Jetson 等边缘设备,支持视频流预测服务 -- 端侧(含移动端) - - 支持 iOS、Android 移动端 - - 支持 ARM CPU 端侧设备 -- 支持主流硬件 - - 支持 Intel CPU 系列(含酷睿、至强等) - - 支持 ARM CPU 全系(含高通、MTK、RK等) - - 支持 NVIDIA GPU 全系(含 V100、T4、Jetson 等) - -### 🤗**丰富的预训练模型,轻松下载SDK搞定推理部署** - - - - -| 模型| 任务 | 大小(MB) | 端侧 | 移动端 | 移动端 |边缘端 |服务器+云端 | 服务器+云端 | 服务器+云端 | 服务器+云端 | -|---|---|---|---|---|---|---|---|---|---|---| -|----- | ---- |----- | Linux | Android | iOS | Linux | Linux | Linux | Windows | Windows | -|----- | ---- |--- | ARM CPU | ARM CPU | ARM CPU | Jetson | X86 CPU | GPU | X86 CPU | GPU | -| [PP-LCNet](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.3/docs/zh_CN/models_training/classification.md) |Classfication | 11.9 |✅|✅|✅|✅|✅|✅|✅|✅| -| [PP-LCNetv2](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.3/docs/zh_CN/models_training/classification.md) |Classfication | 26.6 |✅|✅|✅|✅|✅|✅|✅|✅| -| [EfficientNet](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.3/docs/zh_CN/models_training/classification.md) |Classfication |31.4 |✅|✅|✅|✅|✅|✅|✅|✅| -| [GhostNet](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.3/docs/zh_CN/models_training/classification.md) |Classfication | 20.8 |✅|✅|✅|✅|✅|✅|✅|✅| -| [MobileNetV1](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.3/docs/zh_CN/models_training/classification.md) |Classfication | 17 |✅|✅|✅|✅|✅|✅|✅|✅|✅| -| [MobileNetV2](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.3/docs/zh_CN/models_training/classification.md) |Classfication | 14.2 |✅|✅|✅|✅|✅|✅|✅|✅| -| [MobileNetV3](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.3/docs/zh_CN/models_training/classification.md) |Classfication | 22 |✅|✅|✅|✅|✅|✅|✅|✅| -| [ShuffleNetV2](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.3/docs/zh_CN/models_training/classification.md)|Classfication | 9.2 |✅|✅|✅|✅|✅|✅|✅|✅| -| [SqueezeNetV1.1](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.3/docs/zh_CN/models_training/classification.md) |Classfication |5 |✅|✅|✅|✅|✅|✅|✅|✅| -| [Inceptionv3](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.3/docs/zh_CN/models_training/classification.md) |Classfication |95.5 |✅|✅|✅|✅|✅|✅|✅|✅| -| [PP-HGNet](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.3/docs/zh_CN/models_training/classification.md) |Classfication | 59 |✅|✅|✅|✅|✅|✅|✅|✅| -| [ResNet50_vd](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.3/docs/zh_CN/models_training/classification.md) |Classfication | 102.5 |❌|❌|❌|✅|✅|✅|✅|✅| -| [SwinTransformer_224_win7](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.3/docs/zh_CN/models_training/classification.md) |Classfication | 352.7 |✅|✅|✅|✅|✅|✅|✅|✅| -| [PP-PicoDet_s_320_coco](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/docs/tutorials/GETTING_STARTED_cn.md) |Detection | 4.1 |✅|✅|✅|✅|✅|✅|✅|✅| -| [PP-PicoDet_s_320_lcnet](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/docs/tutorials/GETTING_STARTED_cn.md) |Detection | 4.9 |✅|✅|✅|✅|✅|✅|✅|✅| -| [CenterNet](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/docs/tutorials/GETTING_STARTED_cn.md) |Detection |4.8 |✅|✅|✅|✅ |✅ |✅|✅|✅| -| [YOLOv3_MobileNetV3](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/docs/tutorials/GETTING_STARTED_cn.md) |Detection | 94.6 |✅|✅|✅|✅|✅|✅|✅|✅| -| [PP-YOLO_tiny_650e_coco](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/docs/tutorials/GETTING_STARTED_cn.md) |Detection |4.4 |✅|✅|✅|✅|✅|✅|✅|✅| -| [SSD_MobileNetV1_300_120e_voc](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/docs/tutorials/GETTING_STARTED_cn.md) |Detection | 23.3 |✅|✅|✅|✅|✅|✅|✅|✅| -| [YOLOX_Nano_300e_coco](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/docs/tutorials/GETTING_STARTED_cn.md) |Detection | 3.7 |❌|❌|❌|✅|✅ |✅|✅|✅| -| [PP-YOLO_ResNet50vd](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/docs/tutorials/GETTING_STARTED_cn.md) |Detection | 188.5|✅ |✅ |✅ |✅ |✅ |✅|✅|✅| -| [PP-YOLOv2_ResNet50vd](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/docs/tutorials/GETTING_STARTED_cn.md) |Detection | 218.7 |✅|✅|✅|✅|✅ |✅|✅|✅| -| [PP-YOLO_crn_l_300e_coco](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/docs/tutorials/GETTING_STARTED_cn.md) |Detection | 209.1 |✅|✅|✅|✅|✅|✅|✅|✅| -| [YOLOv5s](https://github.com/ultralytics/yolov5) |Detection | 29.3|✅|✅|✅|✅|✅|✅|✅|✅| -| [Faster R-CNN_r50_fpn_1x_coco](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/docs/tutorials/GETTING_STARTED_cn.md) |Detection | 167.2 |❌|❌|❌|✅|✅|✅|✅|✅| -| [BlazeFace](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/docs/tutorials/GETTING_STARTED_cn.md) |Face Detection |1.5|✅|✅|✅|✅|✅|✅|✅|✅| -| [RetinaFace](https://github.com/biubug6/Pytorch_Retinaface) |Face Localisation |1.7| ✅|❌|❌|✅|✅|✅|✅|✅| -| [PP-TinyPose](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/docs/tutorials/GETTING_STARTED_cn.md) |Keypoint Detection| 5.5 |✅|✅|✅|✅|✅|✅|✅|✅| -| [PP-LiteSeg(STDC1)](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/configs/pp_liteseg/README.md)|Segmentation | 32.2|✅|✅|✅|✅|✅|✅|✅|✅| -| [PP-HumanSeg-Lite](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/contrib/PP-HumanSeg/README_cn.md) |Segmentation | 0.556|✅|✅|✅|✅|✅|✅|✅|✅| -| [HRNet-w18](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/docs/train/train_cn.md) |Segmentation | 38.7|✅|✅|✅|❌|✅|✅|✅|✅| -| [Mask R-CNN_r50_fpn_1x_coco](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/contrib/PP-HumanSeg/README_cn.md)|Segmentation| 107.2|❌|❌|❌|✅|✅|✅|✅|✅| -| [PP-HumanSeg-Server](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/contrib/PP-HumanSeg/README_cn.md)|Segmentation | 107.2|✅|✅|✅|✅|✅|✅|✅|✅| -| [Unet](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/docs/train/train_cn.md) |Segmentation | 53.7|❌|✅|❌|❌|✅|✅|✅|❌| -| [Deeplabv3-ResNet50](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/docs/train/train_cn.md)|Segmentation |156.5|❌|❌|❌|❌|✅|✅|✅|✅| -| [PP-OCRv1](https://github.com/PaddlePaddle/PaddleOCR/blob/release%2F2.5/doc/doc_ch/ppocr_introduction.md) |OCR | 2.3+4.4 |✅|✅|✅|✅|✅|✅|✅|✅| -| [PP-OCRv2](https://github.com/PaddlePaddle/PaddleOCR/blob/release%2F2.5/doc/doc_ch/ppocr_introduction.md) |OCR | 2.3+4.4 |✅|✅|✅|✅|✅|✅|✅|✅| -| [PP-OCRv3](https://github.com/PaddlePaddle/PaddleOCR/blob/release%2F2.5/doc/doc_ch/PP-OCRv3_introduction.md) |OCR | 2.4+10.6 |✅|✅|✅|✅|✅|✅|✅|✅| -| [PP-OCRv3-tiny](https://github.com/PaddlePaddle/PaddleOCR/blob/release%2F2.5/doc/doc_ch/models_list.md) |OCR |2.4+10.7 |✅|✅|✅|✅|✅|✅|✅|✅| - - - -## SDK安装 - -### 方式1:网页版下载安装 - -- 可以登录[EasyEdge网页端](https://ai.baidu.com/easyedge/app/openSource)下载SDK - -### 方式2:pip安装 - -开发者可以通过pip安装`fastdeploy-python`来获取最新的下载链接 - -- 环境依赖 - - python >= 3.6 - -- 安装方式 +## 代码提交 +提交代码前,先初始化代码环境,在clone代码后,执行 ``` -pip install fastdeploy-python --upgrade +sh commit-prepare.sh ``` -- 使用方式 - - - 列出FastDeploy当前支持的所有模型 - ``` - fastdeploy --list_models - ``` - - 下载模型在具体平台和对应硬件上的部署SDK以及示例 - ``` - fastdeploy --download_sdk \ - --model PP-PicoDet-s_320 \ - --platform Linux \ - --soc x86 \ - --save_dir . - ``` - - - 参数说明 - - `list_models`: 列出FastDeploy当前最新支持的所有模型 - - `download_sdk`: 下载模型在具体平台和对应硬件上的部署SDK以及示例 - - `model`: 模型名,如"PP-PicoDet-s_320",可通过`list_models`查看所有的可选项 - - `platform`: 部署平台,支持 Windows/Linux/Android/iOS - - `soc`: 部署硬件,支持 x86/x86-NVIDIA-GPU/ARM/Jetson - - `save_dir`: SDK下载保存目录 - -## SDK使用 -### 1 云+服务器部署 - - Linux 系统(X86 CPU、NVIDIA GPU) - - [C++ Inference部署(含视频流)](./docs/Linux-CPP-SDK-Inference.md) - - [C++ 服务化部署](./docs/Linux-CPP-SDK-Serving.md) - - [Python Inference部署](./docs/Linux-Python-SDK-Inference.md) - - [Python 服务化部署](./docs/Linux-Python-SDK-Serving.md) - - Window系统(X86 CPU、NVIDIA GPU) - - [C++ Inference部署(含视频流)](./docs/Windows-CPP-SDK-Inference.md) - - [C++ 服务化部署](./docs/Windows-CPP-SDK-Serving.md) - - [Python Inference部署](./docs/Windows-Python-SDK-Inference.md) - - [Python 服务化部署](./docs/Windows-Python-SDK-Serving.md) - -### 2 边缘侧部署 - - ArmLinux 系统(NVIDIA Jetson Nano/TX2/Xavier) - - [C++ Inference部署(含视频流)](./docs/Jetson-Linux-CPP-SDK-Inference.md) - - [C++ 服务化部署](./docs/Jetson-Linux-CPP-SDK-Serving.md) - -### 3 端侧部署 - - ArmLinux 系统(ARM CPU) - - [C++ Inference部署(含视频流)](./docs/ARM-Linux-CPP-SDK-Inference.md) - - [C++ 服务化部署](./docs/ARM-Linux-CPP-SDK-Serving.md) - - [Python Inference部署](./docs/ARM-Linux-Python-SDK-Inference.md) - - [Python 服务化部署](./docs/ARM-Linux-Python-SDK-Serving.md) - -### 4 移动端部署 - - [iOS 系统部署](./docs/iOS-SDK.md) - - [Android 系统部署](./docs/Android-SDK.md) - -### 5 自定义模型部署 - - [快速实现个性化模型替换](./docs/Replace-Model-With-Anther-One.md) - -## 社区交流 - - **加入社区👬:** 微信扫描二维码后,填写问卷加入交流群,与开发者共同讨论推理部署痛点问题 - -
- -
- - - -## Acknowledge - -本项目中SDK生成和下载使用了[EasyEdge](https://ai.baidu.com/easyedge/app/openSource)中的免费开放能力,再次表示感谢。 - -## License - -FastDeploy遵循[Apache-2.0开源协议](./LICENSE)。 +在之后commit代码时,会自动进行代码格式的检查。 diff --git a/ThirdPartyNotices.txt b/ThirdPartyNotices.txt new file mode 100644 index 0000000000..5842b9a717 --- /dev/null +++ b/ThirdPartyNotices.txt @@ -0,0 +1,734 @@ +This project depends on some open source projects, list as below + +-------- +1. https://github.com/protocolbuffers/protobuf + +Copyright 2008 Google Inc. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Code generated by the Protocol Buffer compiler is owned by the owner +of the input file used when generating it. This code is not +standalone and requires a support library to be linked with it. This +support library is itself covered by the above license. + +-------- +2. https://github.com/onnx/onnx + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +-------- +3. https://github.com/microsoft/onnxruntime + +MIT License + +Copyright (c) Microsoft Corporation + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +-------- +4. https://github.com/pybind/pybind11 + +Copyright (c) 2016 Wenzel Jakob , All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Please also refer to the file .github/CONTRIBUTING.md, which clarifies licensing of +external contributions to this project including patches, pull requests, etc. + +-------- +4. https://github.com/onnx/onnx-tensorrt + + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2021 NVIDIA Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +-------- +5. https://github.com/opencv/opencv + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +-------- +6. https://github.com/jbeder/yaml-cpp + +Copyright (c) 2008-2015 Jesse Beder. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/VERSION_NUMBER b/VERSION_NUMBER new file mode 100644 index 0000000000..0d91a54c7d --- /dev/null +++ b/VERSION_NUMBER @@ -0,0 +1 @@ +0.3.0 diff --git a/commit-prepare.sh b/commit-prepare.sh index faa217e8f4..19626baaac 100644 --- a/commit-prepare.sh +++ b/commit-prepare.sh @@ -3,4 +3,5 @@ cd $path pip install pre-commit pip install yapf +pip install cpplint pre-commit install diff --git a/demo/cpp/vision/yolov5.cc b/demo/cpp/vision/yolov5.cc new file mode 100644 index 0000000000..0fabe77ba1 --- /dev/null +++ b/demo/cpp/vision/yolov5.cc @@ -0,0 +1,49 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/vision.h" + +int main() { + auto model = fastdeploy::vision::ultralytics::YOLOv5("yolov5s.onnx"); + model.EnableDebug(); + if (!model.Initialized()) { + std::cout << "Init Failed." << std::endl; + return -1; + } + cv::Mat im = cv::imread("bus.jpg"); + + for (size_t i = 0; i < 10; ++i) { + auto im1 = im.clone(); + fastdeploy::vision::DetectionResult res; + if (!model.Predict(&im1, &res)) { + std::cout << "Predict Failed." << std::endl; + return -1; + } + } + + { + fastdeploy::vision::DetectionResult res; + auto vis_im = im.clone(); + if (!model.Predict(&im, &res)) { + std::cout << "Predict Failed." << std::endl; + return -1; + } + + fastdeploy::vision::Visualize::VisDetection(&vis_im, res); + cv::imwrite("vis.jpg", vis_im); + // Print Detection Result + std::cout << res.Str() << std::endl; + } + return 0; +} diff --git a/demo/python/vision/yolov5.py b/demo/python/vision/yolov5.py new file mode 100644 index 0000000000..d01df98b8a --- /dev/null +++ b/demo/python/vision/yolov5.py @@ -0,0 +1,10 @@ +import fastdeploy as fd +import cv2 + +# 获取模型 和 测试图片 +# wget https://github.com/ultralytics/yolov5/releases/download/v6.0/yolov5s.onnx +# wget https://raw.githubusercontent.com/ultralytics/yolov5/master/data/images/bus.jpg +model = fd.vision.ultralytics.YOLOv5("yolov5s.onnx") +im = cv2.imread("bus.jpg") +result = model.predict(im, conf_threshold=0.25, nms_iou_threshold=0.5) +print(result) diff --git a/docs/ARM-Linux-CPP-SDK-Inference.md b/docs/ARM-Linux-CPP-SDK-Inference.md deleted file mode 100644 index bcd4128e5d..0000000000 --- a/docs/ARM-Linux-CPP-SDK-Inference.md +++ /dev/null @@ -1,404 +0,0 @@ -# 简介 - -本文档介绍FastDeploy中的模型SDK,在ARM Linux C++环境下 : (1)推理部署步骤; (2)介绍模型推流全流程API,方便开发者了解项目后二次开发。 -其中ARM Linux Python请参考[ARM Linux Python环境下的推理部署](./ARM-Linux-Python-SDK-Inference.md)文档。 - -**注意**:部分模型(如Tinypose、OCR等)仅支持图像推理,不支持视频推理。 - - - -* [简介](#简介) - -* [环境准备](#环境准备) - - * [1. 硬件支持](#1-硬件支持) - * [2. 软件环境](#2-软件环境) - -* [快速开始](#快速开始) - - * [1. 项目结构说明](#1-项目结构说明) - * [2. 测试Demo](#2-测试demo) - * [2.1 预测图像](#21-预测图像) - * [2.2 预测视频流](#22-预测视频流) - -* [预测API流程详解](#预测api流程详解) - - * [1. SDK参数运行配置](#1-sdk参数运行配置) - * [2. 初始化Predictor](#2-初始化predictor) - * [3. 预测推理](#3-预测推理) - * [3.1 预测图像](#31-预测图像) - * [3.2 预测视频](#32-预测视频) - -* [FAQ](#faq) - - - -# 环境准备 - -## 1. 硬件支持 - -目前支持的ARM架构:aarch64 、armv7hf - -## 2. 软件环境 - -1.运行二进制文件-环境要求 - -* gcc: 5.4 以上 (GLIBCXX_3.4.22) - * Linux下查看gcc版本命名(可能因系统差异命令会不同):`gcc --version` - * Linux下C++基础库GLIBCXX的命令(因系统差异,库路径会有不同):`strings /usr/lib64/libstdc++.so.6 | grep GLIBCXX` -* glibc:2.23以上 - * Linux查看命令:`ldd --version` - -2.二次开发编译-环境要求 - -编译源代码时,除gcc、GLIBCXX、glibc满足`1.运行二进制文件-环境要求`外,cmake需满足: - -* cmake: 3.0 以上 - - * Linux查看命令:`cmake --version` - -# 快速开始 - -## 1. 项目结构说明 - -根据开发者模型、部署芯片、操作系统需要,在图像界面[飞桨开源模型](https://ai.baidu.com/easyedge/app/openSource)或[GIthub](https://github.com/PaddlePaddle/FastDeploy)中选择对应的SDK进行下载。SDK目录结构如下: - -``` -.EasyEdge-Linux-m43157-b97741-x86 -├── RES                 # 模型资源文件夹,一套模型适配不同硬件、OS和部署方式 -│ ├── conf.json        # Android、iOS系统APP名字需要 -│ ├── model # 模型结构文件 -│ ├── params # 模型参数文件 -│ ├── label_list.txt # 模型标签文件 -│ ├── infer_cfg.json # 模型前后处理等配置文件 -├── ReadMe.txt -├── cpp # C++ SDK 文件结构 - └── baidu_easyedge_ocr_linux_cpp_aarch64_ARM_gcc5.4_v1.5.1_20220530.tar.gz #armv8架构硬件的C++包,根据自己硬件,选择对应的压缩包解压即可 - ├── ReadMe.txt - ├── bin # 可直接运行的二进制文件 - ├── include # 二次开发用的头文件 - ├── lib # 二次开发用的所依赖的库 - ├── src # 二次开发用的示例工程 - └── thirdparty # 第三方依赖 - └── baidu_easyedge_ocr_linux_cpp_armv7l_armv7hf_ARM_gcc5.4_v1.5.1_20220530.tar.gz #armv7架构硬件的C++包,根据自己硬件,选择对应的压缩包解压即可 -└── python # Python SDK 文件 -``` - -**注意**: - -1. 【OCR需要编译】因为OCR任务的特殊性,本次SDK没有提供bin文件夹可执行文件。开发者根据需要,满足文档中gcc和cmake要求后,在`src/demo*`路径编译获取可执行文件,具体可参考。 -2. 【OCR仅支持图像推理,不支持视频流推理】 -3. ARM-Linux-Python的环境要求和使用,请参考[ARM Linux Python环境下的推理部署](./ARM-Linux-Python-SDK.md)文档。 - -## 2. 测试Demo - -> 模型资源文件(即压缩包中的RES文件夹)默认已经打包在开发者下载的SDK包中,请先将tar包整体拷贝到具体运行的设备中,再解压缩使用。 - -SDK中已经包含预先编译的二进制,可直接运行。以下运行示例均是`cd cpp/bin`路径下执行的结果。 - -### 2.1 预测图像 - -```bash -./easyedge_image_inference {模型RES文件夹路径} {测试图片路径} -``` - -运行效果示例: - -
- -```bash - > ./easyedge_image_inference ../../../../RES 2.jpeg -2019-02-13 16:46:12,659 INFO [EasyEdge] [easyedge.cpp:34] 140606189016192 Baidu EasyEdge Linux Development Kit 0.2.1(20190213) -2019-02-13 16:46:14,083 INFO [EasyEdge] [paddlev2_edge_predictor.cpp:60] 140606189016192 Allocate graph success. -2019-02-13 16:46:14,326 DEBUG [EasyEdge] [paddlev2_edge_predictor.cpp:143] 140606189016192 Inference costs 168 ms -1, 1:txt_frame, p:0.994905 loc: 0.168161, 0.153654, 0.920856, 0.779621 -Done -``` - -### 2.2 预测视频流 - -``` -./easyedge_video_inference {模型RES文件夹路径} {video_type} {video_src_path} -``` - -其中 video_type 支持三种: - -``` - video_type : 1 // 本地视频文件 - video_type : 2 // 摄像头的index - video_type : 3 // 网络视频流 -``` - -video_src_path: 为 video_type 数值所对应的本地视频路径 、本地摄像头id、网络视频流地址,如: - -``` - 本地视频文件: ./easyedge_video_inference {模型RES文件夹路径} 1 ~/my_video_file.mp4 - 本地摄像头: ./easyedge_video_inference {模型RES文件夹路径} 2 1 #/dev/video1 - 网络视频流: ./easyedge_video_inference {模型RES文件夹路径} 3 rtmp://192.168.x.x:8733/live/src -``` - -注:以上路径是假模拟路径,开发者需要根据自己实际图像/视频,准备测试图像,并填写正确的测试路径。 - -# 预测API流程详解 - -本章节主要结合[2.测试Demo](#4)的Demo示例介绍推理API,方便开发者学习后二次开发。更详细的API请参考`include/easyedge/easyedge*.h`文件。图像、视频的推理包含以下3个API,如下代码片段`step`注释所示。 - -> ❗注意:
-> (1)`src`文件夹中包含完整可编译的cmake工程实例,建议开发者先行了解[cmake工程基本知识](https://cmake.org/cmake/help/latest/guide/tutorial/index.html)。
-> (2)请优先参考SDK中自带的Demo工程的使用流程和说明。遇到错误,请优先参考文件中的注释、解释、日志说明。 - -```cpp - // step 1: SDK配置运行参数 - EdgePredictorConfig config; - config.model_dir = {模型文件目录}; - - // step 2: 创建并初始化Predictor;这这里选择合适的引擎 - auto predictor = global_controller()->CreateEdgePredictor(config); - - // step 3-1: 预测图像 - auto img = cv::imread({图片路径}); - std::vector results; - predictor->infer(img, results); - - // step 3-2: 预测视频 - std::vector results; - FrameTensor frame_tensor; - VideoConfig video_config; - video_config.source_type = static_cast(video_type); // source_type 定义参考头文件 easyedge_video.h - video_config.source_value = video_src; - /* - ... more video_configs, 根据需要配置video_config的各选项 - */ - auto video_decoding = CreateVideoDecoding(video_config); - while (video_decoding->next(frame_tensor) == EDGE_OK) { - results.clear(); - if (frame_tensor.is_needed) { - predictor->infer(frame_tensor.frame, results); - render(frame_tensor.frame, results, predictor->model_info().kind); - } - //video_decoding->display(frame_tensor); // 显示当前frame,需在video_config中开启配置 - //video_decoding->save(frame_tensor); // 存储当前frame到视频,需在video_config中开启配置 - } -``` - -若需自定义library search path或者gcc路径,修改对应Demo工程下的CMakeList.txt即可。 - -## 1. SDK参数运行配置 - -SDK的参数通过`EdgePredictorConfig::set_config`和`global_controller()->set_config`配置。本Demo 中设置了模型路径,其他参数保留默认参数。更详细的支持运行参数等,可以参考开发工具包中的头文件(`include/easyedge/easyedge_xxxx_config.h`)的详细说明。 - -配置参数使用方法如下: - -``` -EdgePredictorConfig config; -config.model_dir = {模型文件目录}; -``` - -## 2. 初始化Predictor - -* 接口 - - ```cpp - auto predictor = global_controller()->CreateEdgePredictor(config); - predictor->init(); - ``` - -若返回非0,请查看输出日志排查错误原因。 - -## 3. 预测推理 - -### 3.1 预测图像 - -> 在Demo中展示了预测接口infer()传入cv::Mat& image图像内容,并将推理结果赋值给std::vector& result。更多关于infer()的使用,可以根据参考`easyedge.h`头文件中的实际情况、参数说明自行传入需要的内容做推理 - -* 接口输入 - -```cpp - /** - * @brief - * 通用接口 - * @param image: must be BGR , HWC format (opencv default) - * @param result - * @return - */ - virtual int infer(cv::Mat& image, std::vector& result) = 0; -``` - - 图片的格式务必为opencv默认的BGR, HWC格式。 - -* 接口返回 - - `EdgeResultData`中可以获取对应的分类信息、位置信息。 - -```cpp -struct EdgeResultData { - int index; // 分类结果的index - std::string label; // 分类结果的label - float prob; // 置信度 - - // 物体检测 或 图像分割时使用: - float x1, y1, x2, y2; // (x1, y1): 左上角, (x2, y2): 右下角; 均为0~1的长宽比例值。 - - // 图像分割时使用: - cv::Mat mask; // 0, 1 的mask - std::string mask_rle; // Run Length Encoding,游程编码的mask -}; -``` - -*** 关于矩形坐标 *** - -x1 * 图片宽度 = 检测框的左上角的横坐标 - -y1 * 图片高度 = 检测框的左上角的纵坐标 - -x2 * 图片宽度 = 检测框的右下角的横坐标 - -y2 * 图片高度 = 检测框的右下角的纵坐标 - -*** 关于图像分割mask *** - -``` -cv::Mat mask为图像掩码的二维数组 -{ - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 1, 1, 1, 0, 0, 0, 0}, - {0, 0, 0, 1, 1, 1, 0, 0, 0, 0}, - {0, 0, 0, 1, 1, 1, 0, 0, 0, 0}, - {0, 0, 0, 1, 1, 1, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, -} -其中1代表为目标区域,0代表非目标区域 -``` - -*** 关于图像分割mask_rle *** - -该字段返回了mask的游程编码,解析方式可参考 [http demo](https://github.com/Baidu-AIP/EasyDL-Segmentation-Demo) - -以上字段可以参考demo文件中使用opencv绘制的逻辑进行解析 - -### 3.2 预测视频 - -SDK 提供了支持摄像头读取、视频文件和网络视频流的解析工具类`VideoDecoding`,此类提供了获取视频帧数据的便利函数。通过`VideoConfig`结构体可以控制视频/摄像头的解析策略、抽帧策略、分辨率调整、结果视频存储等功能。对于抽取到的视频帧可以直接作为SDK infer 接口的参数进行预测。 - -* 接口输入 - -class`VideoDecoding`: - -``` - /** - * @brief 获取输入源的下一帧 - * @param frame_tensor - * @return - */ - virtual int next(FrameTensor &frame_tensor) = 0; - - /** - * @brief 显示当前frame_tensor中的视频帧 - * @param frame_tensor - * @return - */ - virtual int display(const FrameTensor &frame_tensor) = 0; - - /** - * @brief 将当前frame_tensor中的视频帧写为本地视频文件 - * @param frame_tensor - * @return - */ - virtual int save(FrameTensor &frame_tensor) = 0; - - /** - * @brief 获取视频的fps属性 - * @return - */ - virtual int get_fps() = 0; - /** - * @brief 获取视频的width属性 - * @return - */ - virtual int get_width() = 0; - - /** - * @brief 获取视频的height属性 - * @return - */ - virtual int get_height() = 0; -``` - -struct `VideoConfig` - -``` -/** - * @brief 视频源、抽帧策略、存储策略的设置选项 - */ -struct VideoConfig { - SourceType source_type; // 输入源类型 - std::string source_value; // 输入源地址,如视频文件路径、摄像头index、网络流地址 - int skip_frames{0}; // 设置跳帧,每隔skip_frames帧抽取一帧,并把该抽取帧的is_needed置为true - int retrieve_all{false}; // 是否抽取所有frame以便于作为显示和存储,对于不满足skip_frames策略的frame,把所抽取帧的is_needed置为false - int input_fps{0}; // 在采取抽帧之前设置视频的fps - Resolution resolution{Resolution::kAuto}; // 采样分辨率,只对camera有效 - - bool enable_display{false}; // 默认不支持。 - std::string window_name{"EasyEdge"}; - bool display_all{false}; // 是否显示所有frame,若为false,仅显示根据skip_frames抽取的frame - - bool enable_save{false}; - std::string save_path; // frame存储为视频文件的路径 - bool save_all{false}; // 是否存储所有frame,若为false,仅存储根据skip_frames抽取的frame - - std::map conf; -}; -``` - -| 序号 | 字段 | 含义 | -| --- | -------------- | ---------------------------------------------------------------------------------------------------------------------------------- | -| 1 | `source_type` | 输入源类型,支持视频文件、摄像头、网络视频流三种,值分别为1、2、3 | -| 2 | `source_value` | 若`source_type`为视频文件,该值为指向视频文件的完整路径;若`source_type`为摄像头,该值为摄像头的index,如对于`/dev/video0`的摄像头,则index为0;若`source_type`为网络视频流,则为该视频流的完整地址。 | -| 3 | `skip_frames` | 设置跳帧,每隔skip_frames帧抽取一帧,并把该抽取帧的is_needed置为true,标记为is_needed的帧是用来做预测的帧。反之,直接跳过该帧,不经过预测。 | -| 4 | `retrieve_all` | 若置该项为true,则无论是否设置跳帧,所有的帧都会被抽取返回,以作为显示或存储用。 | -| 5 | `input_fps` | 用于抽帧前设置fps | -| 6 | `resolution` | 设置摄像头采样的分辨率,其值请参考`easyedge_video.h`中的定义,注意该分辨率调整仅对输入源为摄像头时有效 | -| 7 | `conf` | 高级选项。部分配置会通过该map来设置 | - -*** 注意:*** - -1. `VideoConfig`不支持`display`功能。如果需要使用`VideoConfig`的`display`功能,需要自行编译带有GTK选项的OpenCV。 - -2. 使用摄像头抽帧时,如果通过`resolution`设置了分辨率调整,但是不起作用,请添加如下选项: - - ``` - video_config.conf["backend"] = "2"; - ``` - -3. 部分设备上的CSI摄像头尚未兼容,如遇到问题,可以通过工单、QQ交流群或微信交流群反馈。 - -具体接口调用流程,可以参考SDK中的`demo_video_inference`。 - -# FAQ - -1. 如何处理一些 undefined reference / error while loading shared libraries? - - > 如:./easyedge_demo: error while loading shared libraries: libeasyedge.so.1: cannot open shared object file: No such file or directory - - 遇到该问题时,请找到具体的库的位置,设置LD_LIBRARY_PATH;或者安装缺少的库。 - - > 示例一:libverify.so.1: cannot open shared object file: No such file or directory - > 链接找不到libveirfy.so文件,一般可通过 export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:../../lib 解决(实际冒号后面添加的路径以libverify.so文件所在的路径为准) - - > 示例二:libopencv_videoio.so.4.5: cannot open shared object file: No such file or directory - > 链接找不到libopencv_videoio.so文件,一般可通过 export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:../../thirdparty/opencv/lib 解决(实际冒号后面添加的路径以libopencv_videoio.so所在路径为准) - - > 示例三:GLIBCXX_X.X.X not found - > 链接无法找到glibc版本,请确保系统gcc版本>=SDK的gcc版本。升级gcc/glibc可以百度搜索相关文献。 - -2. 运行二进制时,提示 libverify.so cannot open shared object file - - 可能cmake没有正确设置rpath, 可以设置LD_LIBRARY_PATH为sdk的lib文件夹后,再运行: - - ```bash - LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../lib ./easyedge_demo - ``` - -3. 编译时报错:file format not recognized - - 可能是因为在复制SDK时文件信息丢失。请将整个压缩包复制到目标设备中,再解压缩、编译。 diff --git a/docs/ARM-Linux-CPP-SDK-Serving.md b/docs/ARM-Linux-CPP-SDK-Serving.md deleted file mode 100644 index 9c6a215cfd..0000000000 --- a/docs/ARM-Linux-CPP-SDK-Serving.md +++ /dev/null @@ -1,318 +0,0 @@ -# 简介 - -本文档介绍FastDeploy中的模型SDK,在ARM Linux C++环境下:(1)服务化推理部署步骤;(2)介绍模型推流全流程API,方便开发者了解项目后二次开发。 -其中ARM Linux Python请参考[ARM Linux Python环境下的HTTP推理部署](./ARM-Linux-Python-SDK-Serving.md)文档。 - -**注意**:部分模型(如OCR等)不支持服务化推理。 - - - -* [简介](#简介) - -* [安装准备](#安装准备) - - * [1. 硬件支持](#1-硬件支持) - * [2. 软件环境](#2-软件环境) - -* [快速开始](#快速开始) - - * [1. 项目结构说明](#1-项目结构说明) - * [2. 测试 HTTP Demo](#2-测试-http-demo) - * [2.1 启动HTTP预测服务](#21-启动http预测服务) - -* [HTTP API流程详解](#http-api流程详解) - - * [1. 开启http服务](#1-开启http服务) - * [2. 请求http服务](#2-请求http服务) - * [2.1 http 请求方式一:不使用图片base64格式](#21-http-请求方式一不使用图片base64格式) - * [2.2 http 请求方法二:使用图片base64格式](#22-http-请求方法二使用图片base64格式) - * [3. http返回数据](#3-http返回数据) - -* [FAQ](#faq) - - - -# 安装准备 - -## 1. 硬件支持 - -目前支持的ARM架构:aarch64 、armv7hf - -## 2. 软件环境 - -1.运行二进制文件-环境要求 - -* gcc: 5.4 以上 (GLIBCXX_3.4.22) - * Linux下查看gcc版本命名(可能因系统差异命令会不同):`gcc --version`; - * Linux下C++基础库GLIBCXX的命令(可能因系统差异路径会有不同,可检测自己环境下的情况):`strings /usr/lib64/libstdc++.so.6 | grep GLIBCXX` -* glibc:2.23以上 - * Linux查看命令:`ldd --version` - -2.二次开发编译-环境要求 - -编译源代码时,除了gcc、GLIBCXX、glibc满足`1.运行二进制文件-环境要求`外,还需要cmake满足要求。 - -* cmake: 3.0 以上 - - * Linux查看命令:`cmake --version` - -# 快速开始 - -## 1. 项目结构说明 - -根据开发者模型、部署芯片、操作系统需要,在图像界面[飞桨开源模型](https://ai.baidu.com/easyedge/app/openSource)或[GIthub](https://github.com/PaddlePaddle/FastDeploy)中选择对应的SDK进行下载。解压后SDK目录结构如下: - -``` -.EasyEdge-Linux-m43157-b97741-x86 -├── RES                 # 模型资源文件夹,一套模型适配不同硬件、OS和部署方式 -│ ├── conf.json        # Android、iOS系统APP名字需要 -│ ├── model # 模型结构文件 -│ ├── params # 模型参数文件 -│ ├── label_list.txt # 模型标签文件 -│ ├── infer_cfg.json # 模型前后处理等配置文件 -├── ReadMe.txt -├── cpp                 # C++ SDK 文件结构 - └── baidu_easyedge_linux_cpp_x86_64_CPU.Generic_gcc5.4_v1.4.0_20220325.tar.gz - ├── bin         # 可直接运行的二进制文件 - ├── include     # 二次开发用的头文件 - ├── lib         # 二次开发用的所依赖的库 - ├── src         # 二次开发用的示例工程 - └── thirdparty  # 第三方依赖 -└── python # Python SDK 文件 -``` - -## 2. 测试 HTTP Demo - -> 模型资源文件(即压缩包中的RES文件夹)默认已经打包在开发者下载的SDK包中,请先将tar包整体拷贝到具体运行的设备中,再解压缩使用。 - -SDK中已经包含预先编译的二进制,可直接运行。以下运行示例均是`cd cpp/bin`路径下执行的结果。 - -### 2.1 启动HTTP预测服务 - -``` -./easyedge_serving {模型RES文件夹路径} -``` - -启动后,日志中会显示如下设备IP和24401端口号信息: - -``` -HTTP is now serving at 0.0.0.0:24401 -``` - -此时,开发者可以打开浏览器,输入链接地址`http://0.0.0.0:24401`(这里的`设备IP和24401端口号`根据开发者电脑显示修改),选择图片来进行测试。 - -
- -同时,可以调用HTTP接口来访问服务,具体参考下文的[二次开发](#10)接口说明。 - -# HTTP API流程详解 - -本章节主要结合[2.1 HTTP Demo]()的API介绍,方便开发者学习并将运行库嵌入到开发者的程序当中,更详细的API请参考`include/easyedge/easyedge*.h`文件。http服务包含服务端和客户端,目前支持的能力包括以下几种方式,Demo中提供了不使用图片base格式的`方式一:浏览器请求的方式`,其他几种方式开发者根据个人需要,选择开发。 - -## 1. 开启http服务 - -http服务的启动可直接使用`bin/easyedge_serving`,或参考`src/demo_serving.cpp`文件修改相关逻辑 - -```cpp - /** - * @brief 开启一个简单的demo http服务。 - * 该方法会block直到收到sigint/sigterm。 - * http服务里,图片的解码运行在cpu之上,可能会降低推理速度。 - * @tparam ConfigT - * @param config - * @param host - * @param port - * @param service_id service_id user parameter, uri '/get/service_id' will respond this value with 'text/plain' - * @param instance_num 实例数量,根据内存/显存/时延要求调整 - * @return - */ - template - int start_http_server( - const ConfigT &config, - const std::string &host, - int port, - const std::string &service_id, - int instance_num = 1); -``` - -## 2. 请求http服务 - -> 开发者可以打开浏览器,`http://{设备ip}:24401`,选择图片来进行测试。 - -### 2.1 http 请求方式一:不使用图片base64格式 - -URL中的get参数: - -| 参数 | 说明 | 默认值 | -| --------- | --------- | ---------------- | -| threshold | 阈值过滤, 0~1 | 如不提供,则会使用模型的推荐阈值 | - -HTTP POST Body即为图片的二进制内容(无需base64, 无需json) - -Python请求示例 - -```Python -import requests - -with open('./1.jpg', 'rb') as f: - img = f.read() - result = requests.post( - 'http://127.0.0.1:24401/', - params={'threshold': 0.1}, - data=img).json() -``` - -### 2.2 http 请求方法二:使用图片base64格式 - -HTTP方法:POST -Header如下: - -| 参数 | 值 | -| ------------ | ---------------- | -| Content-Type | application/json | - -**Body请求填写**: - -* 分类网络: - body 中请求示例 - - ``` - { - "image": "" - "top_num": 5 - } - ``` - - body中参数详情 - -| 参数 | 是否必选 | 类型 | 可选值范围 | 说明 | -| ------- | ---- | ------ | ----- | ----------------------------------------------------------------------------------- | -| image | 是 | string | - | 图像数据,base64编码,要求base64图片编码后大小不超过4M,最短边至少15px,最长边最大4096px,支持jpg/png/bmp格式 **注意去掉头部** | -| top_num | 否 | number | - | 返回分类数量,不填该参数,则默认返回全部分类结果 | - -* 检测和分割网络: - Body请求示例: - - ``` - { - "image": "" - } - ``` - - body中参数详情: - -| 参数 | 是否必选 | 类型 | 可选值范围 | 说明 | -| --------- | ---- | ------ | ----- | ----------------------------------------------------------------------------------- | -| image | 是 | string | - | 图像数据,base64编码,要求base64图片编码后大小不超过4M,最短边至少15px,最长边最大4096px,支持jpg/png/bmp格式 **注意去掉头部** | -| threshold | 否 | number | - | 默认为推荐阈值,也可自行根据需要进行设置 | - -Python请求示例: - -```Python -import base64 -import requests -def main(): - with open("图像路径", 'rb') as f: - result = requests.post("http://{服务ip地址}:24401/", json={ - "image": base64.b64encode(f.read()).decode("utf8") - }) - # print(result.request.body) - # print(result.request.headers) - print(result.content) - -if __name__ == '__main__': - main() -``` - -## 3. http返回数据 - -| 字段 | 类型说明 | 其他 | -| ---------- | ------ | ------------------------------------ | -| error_code | Number | 0为成功,非0参考message获得具体错误信息 | -| results | Array | 内容为具体的识别结果。其中字段的具体含义请参考`预测图像-返回格式`一节 | -| cost_ms | Number | 预测耗时ms,不含网络交互时间 | - -返回示例 - -```json -{ - "cost_ms": 52, - "error_code": 0, - "results": [ - { - "confidence": 0.94482421875, - "index": 1, - "label": "IronMan", - "x1": 0.059185408055782318, - "x2": 0.18795496225357056, - "y1": 0.14762254059314728, - "y2": 0.52510076761245728, - "mask": "...", // 图像分割模型字段 - "trackId": 0, // 目标追踪模型字段 - }, - - ] -} -``` - -*** 关于矩形坐标 *** - -x1 * 图片宽度 = 检测框的左上角的横坐标 - -y1 * 图片高度 = 检测框的左上角的纵坐标 - -x2 * 图片宽度 = 检测框的右下角的横坐标 - -y2 * 图片高度 = 检测框的右下角的纵坐标 - -*** 关于图像分割mask *** - -``` -cv::Mat mask为图像掩码的二维数组 -{ - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 1, 1, 1, 0, 0, 0, 0}, - {0, 0, 0, 1, 1, 1, 0, 0, 0, 0}, - {0, 0, 0, 1, 1, 1, 0, 0, 0, 0}, - {0, 0, 0, 1, 1, 1, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, -} -其中1代表为目标区域,0代表非目标区域 -``` - -# FAQ - -1. 如何处理一些 undefined reference / error while loading shared libraries? - - > 如:./easyedge_demo: error while loading shared libraries: libeasyedge.so.1: cannot open shared object file: No such file or directory - - 遇到该问题时,请找到具体的库的位置,设置LD_LIBRARY_PATH;或者安装缺少的库。 - - > 示例一:libverify.so.1: cannot open shared object file: No such file or directory - > 链接找不到libveirfy.so文件,一般可通过 export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:../../lib 解决(实际冒号后面添加的路径以libverify.so文件所在的路径为准) - - > 示例二:libopencv_videoio.so.4.5: cannot open shared object file: No such file or directory - > 链接找不到libopencv_videoio.so文件,一般可通过 export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:../../thirdparty/opencv/lib 解决(实际冒号后面添加的路径以libopencv_videoio.so所在路径为准) - - > 示例三:GLIBCXX_X.X.X not found - > 链接无法找到glibc版本,请确保系统gcc版本>=SDK的gcc版本。升级gcc/glibc可以百度搜索相关文献。 - -2. 使用libcurl请求http服务时,速度明显变慢 - - 这是因为libcurl请求continue导致server等待数据的问题,添加空的header即可 - - ```bash - headers = curl_slist_append(headers, "Expect:"); - ``` - -3. 运行二进制时,提示 libverify.so cannot open shared object file - - 可能cmake没有正确设置rpath, 可以设置LD_LIBRARY_PATH为sdk的lib文件夹后,再运行: - - ```bash - LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../lib ./easyedge_demo - ``` - -4. 编译时报错:file format not recognized - - 可能是因为在复制SDK时文件信息丢失。请将整个压缩包复制到目标设备中,再解压缩、编译。 diff --git a/docs/ARM-Linux-Python-SDK-Inference.md b/docs/ARM-Linux-Python-SDK-Inference.md deleted file mode 100644 index 260be108dd..0000000000 --- a/docs/ARM-Linux-Python-SDK-Inference.md +++ /dev/null @@ -1,371 +0,0 @@ -# 简介 - -本文档以[千分类模型_MobileNetV3](https://ai.baidu.com/easyedge/app/openSource)为例,介绍FastDeploy中的模型SDK, 在**ARM Linux Python** 环境下:(1)图像推理部署步骤; (2)介绍模型推流全流程API,方便开发者了解项目后二次开发。其中ARM Linux C++请参考[ARM Linux C++环境下的推理部署](./ARM-Linux-CPP-SDK-Inference.md)文档。 - -**注意**:部分模型(如Tinypose、OCR等)仅支持图像推理,不支持视频推理。 - - - -* [简介](#简介) - -* [环境准备](#环境准备) - - * [1.SDK下载](#1sdk下载) - * [2.硬件支持](#2硬件支持) - * [3.python环境](#3python环境) - * [4.安装依赖](#4安装依赖) - * [4.1.安装paddlepaddle](#41安装paddlepaddle) - * [4.2.安装EasyEdge Python Wheel 包](#42安装easyedge-python-wheel-包) - -* [快速开始](#快速开始) - - * [1.文件结构说明](#1文件结构说明) - * [2.测试Demo](#2测试demo) - * [2.1预测图像](#21预测图像) - -* [Demo API介绍](#demo-api介绍) - - * [1.基础流程](#1基础流程) - * [2.初始化](#2初始化) - * [3.SDK参数配置](#3sdk参数配置) - * [4.预测图像](#4预测图像) - -* [FAQ](#faq) - - - -# 环境准备 - -## 1.SDK下载 - -根据开发者模型、部署芯片、操作系统需要,在图像界面[飞桨开源模型](https://ai.baidu.com/easyedge/app/openSource)或[GIthub](https://github.com/PaddlePaddle/FastDeploy)中选择对应的SDK进行下载。 - -```shell -EasyEdge-Linux-x86--[部署芯片] -├──... -├──python # Linux Python SDK - ├── # 特定Python版本的EasyEdge Wheel包, 二次开发可使用 - ├── BaiduAI_EasyEdge_SDK-1.3.1-cp36-cp36m-linux_aarch64.whl - ├── infer_demo # demo体验完整文件 - │ ├── demo_xxx.py # 包含前后处理的端到端推理demo文件 - │ └── demo_serving.py # 提供http服务的demo文件 - ├── tensor_demo # 学习自定义算法前后处理时使用 - │ └── demo_xxx.py -``` - -## 2.硬件支持 - -目前支持的ARM架构:aarch64 、armv7hf - -## 3.python环境 - -> ARM Linux SDK仅支持Python 3.6 - -使用如下命令获取已安装的Python版本号。如果本机的版本不匹配,建议使用[pyenv](https://github.com/pyenv/pyenv)、[anaconda](https://www.anaconda.com/)等Python版本管理工具对SDK所在目录进行配置。 - -```shell -$python3 --version -``` - -接着使用如下命令确认pip的版本是否满足要求,要求pip版本为20.2.2或更高版本。详细的pip安装过程可以参考[官网教程](https://pip.pypa.io/en/stable/installation/)。 - -```shell -$python3 -m pip --version -``` - -## 4.安装依赖 - -### 4.1.安装paddlepaddle - -根据具体的部署芯片(CPU/GPU)安装对应的PaddlePaddle的whl包。 - -`armv8 CPU平台`可以使用如下命令进行安装: - -```shell -python3 -m pip install http://aipe-easyedge-public.bj.bcebos.com/easydeploy/paddlelite-2.11-cp36-cp36m-linux_aarch64.whl -``` - -### 4.2.安装EasyEdge Python Wheel 包 - -在`python`目录下,安装特定Python版本的EasyEdge Wheel包。`armv8 CPU平台`可以使用如下命令进行安装: - -```shell -python3 -m pip install -U BaiduAI_EasyEdge_SDK-1.3.1-cp36-cp36m-linux_aarch64.whl -``` - -# 快速开始 - -## 1.文件结构说明 - -Python SDK文件结构如下: - -```shell -.EasyEdge-Linux-x86--[部署芯片] -├── RES # 模型资源文件夹,一套模型适配不同硬件、OS和部署方式 -│ ├── conf.json # Android、iOS系统APP名字需要 -│ ├── label_list.txt # 模型标签文件 -│ ├── model # 模型结构文件 -│ ├── params # 模型参数文件 -│ └── infer_cfg.json # 模型前后处理等配置文件 -├── ReadMe.txt -├── cpp # C++ SDK 文件结构 -└── python # Python SDK 文件 - ├── BaiduAI_EasyEdge_SDK-1.3.1-cp36-cp36m-linux_aarch64.whl #EasyEdge Python Wheel 包 - ├── infer_demo - ├── demo_armv8_cpu.py # 图像推理 - ├── demo_serving.py # HTTP服务化推理 - └── tensor_demo # 学习自定义算法前后处理时使用 - ├── demo_armv8_cpu.py -``` - -## 2.测试Demo - -> 模型资源文件默认已经打包在开发者下载的SDK包中, 默认为`RES`目录。 - -### 2.1预测图像 - -使用infer_demo文件夹下的demo文件。 - -```bash -python3 demo_x86_cpu.py {模型RES文件夹} {测试图片路径} -``` - -运行效果示例: - -
- -```shell -2022-06-14 14:40:16 INFO [EasyEdge] [demo_nvidia_gpu.py:38] 140518522509120: Init paddlefluid engine... -2022-06-14 14:40:20 INFO [EasyEdge] [demo_nvidia_gpu.py:38] 140518522509120: Paddle version: 2.2.2 -{'confidence': 0.9012349843978882, 'index': 8, 'label': 'n01514859 hen'} -``` - -可以看到,运行结果为`index:8,label:hen`,通过imagenet [类别映射表](https://gist.github.com/yrevar/942d3a0ac09ec9e5eb3a),可以找到对应的类别,即 'hen',由此说明我们的预测结果正确。 - -# Demo API介绍 - -本章节主要结合[测试Demo](#2测试Demo)的Demo示例介绍推理API,方便开发者学习后二次开发。 - -## 1.基础流程 - -> ❗注意,请优先参考SDK中自带demo的使用流程和说明。遇到错误,请优先参考文件中的注释、解释、日志说明。 - -`infer_demo/demo_xx_xx.py` - -```python -# 引入EasyEdge运行库 -import BaiduAI.EasyEdge as edge - -# 创建并初始化一个预测Progam;选择合适的引擎 -pred = edge.Program() -pred.init(model_dir={RES文件夹路径}, device=edge.Device.CPU, engine=edge.Engine.PADDLE_FLUID) # x86_64 CPU -# pred.init(model_dir=_model_dir, device=edge.Device.GPU, engine=edge.Engine.PADDLE_FLUID) # x86_64 Nvidia GPU -# pred.init(model_dir=_model_dir, device=edge.Device.CPU, engine=edge.Engine.PADDLE_LITE) # armv8 CPU - -# 预测图像 -res = pred.infer_image({numpy.ndarray的图片}) - -# 关闭结束预测Progam -pred.close() -``` - -`infer_demo/demo_serving.py` - -```python -import BaiduAI.EasyEdge as edge -from BaiduAI.EasyEdge.serving import Serving - -# 创建并初始化Http服务 -server = Serving(model_dir={RES文件夹路径}, license=serial_key) - -# 运行Http服务 -# 请参考同级目录下demo_xx_xx.py里: -# pred.init(model_dir=xx, device=xx, engine=xx, device_id=xx) -# 对以下参数device\device_id和engine进行修改 -server.run(host=host, port=port, device=edge.Device.CPU, engine=edge.Engine.PADDLE_FLUID) # x86_64 CPU -# server.run(host=host, port=port, device=edge.Device.GPU, engine=edge.Engine.PADDLE_FLUID) # x86_64 Nvidia GPU -# server.run(host=host, port=port, device=edge.Device.CPU, engine=edge.Engine.PADDLE_LITE) # armv8 CPU -``` - -## 2.初始化 - -* 接口 - - ```python - def init(self, - model_dir, - device=Device.CPU, - engine=Engine.PADDLE_FLUID, - config_file='conf.json', - preprocess_file='preprocess_args.json', - model_file='model', - params_file='params', - label_file='label_list.txt', - infer_cfg_file='infer_cfg.json', - device_id=0, - thread_num=1 - ): - """ - Args: - model_dir: str - device: BaiduAI.EasyEdge.Device,比如:Device.CPU - engine: BaiduAI.EasyEdge.Engine, 比如: Engine.PADDLE_FLUID - config_file: str - preprocess_file: str - model_file: str - params_file: str - label_file: str 标签文件 - infer_cfg_file: 包含预处理、后处理信息的文件 - device_id: int 设备ID - thread_num: int CPU的线程数 - - Raises: - RuntimeError, IOError - Returns: - bool: True if success - """ - ``` - -若返回不是True,请查看输出日志排查错误原因。 - -## 3.SDK参数配置 - -使用 CPU 预测时,可以通过在 init 中设置 thread_num 使用多线程预测。如: - -```python -pred.init(model_dir=_model_dir, device=edge.Device.CPU, engine=edge.Engine.PADDLE_FLUID, thread_num=4) -``` - -使用 GPU 预测时,可以通过在 init 中设置 device_id 指定需要的GPU device id。如: - -```python -pred.init(model_dir=_model_dir, device=edge.Device.GPU, engine=edge.Engine.PADDLE_FLUID, device_id=0) -``` - -## 4.预测图像 - -* 接口 - - ```python - def infer_image(self, img, - threshold=0.3, - channel_order='HWC', - color_format='BGR', - data_type='numpy'): - """ - - Args: - img: np.ndarray or bytes - threshold: float - only return result with confidence larger than threshold - channel_order: string - channel order HWC or CHW - color_format: string - color format order RGB or BGR - data_type: string - 仅在图像分割时有意义。 'numpy' or 'string' - 'numpy': 返回已解析的mask - 'string': 返回未解析的mask游程编码 - - Returns: - list - - """ - ``` - -* 返回格式: `[dict1, dict2, ...]` - -| 字段 | 类型 | 取值 | 说明 | -| ---------- | -------------------- | --------- | ------------------------ | -| confidence | float | 0~1 | 分类或检测的置信度 | -| label | string | | 分类或检测的类别 | -| index | number | | 分类或检测的类别 | -| x1, y1 | float | 0~1 | 物体检测,矩形的左上角坐标 (相对长宽的比例值) | -| x2, y2 | float | 0~1 | 物体检测,矩形的右下角坐标(相对长宽的比例值) | -| mask | string/numpy.ndarray | 图像分割的mask | | - -***关于矩形坐标*** - -x1 * 图片宽度 = 检测框的左上角的横坐标 - -y1 * 图片高度 = 检测框的左上角的纵坐标 - -x2 * 图片宽度 = 检测框的右下角的横坐标 - -y2 * 图片高度 = 检测框的右下角的纵坐标 - -可以参考 demo 文件中使用 opencv 绘制矩形的逻辑。 - -***结果示例*** - - i) 图像分类 - -```json -{ - "index": 736, - "label": "table", - "confidence": 0.9 -} -``` - - ii) 物体检测 - -```json -{ - "index": 8, - "label": "cat", - "confidence": 1.0, - "x1": 0.21289, - "y1": 0.12671, - "x2": 0.91504, - "y2": 0.91211, -} -``` - - iii) 图像分割 - -```json -{ - "name": "cat", - "score": 1.0, - "location": { - "left": ..., - "top": ..., - "width": ..., - "height": ..., - }, - "mask": ... -} -``` - -mask字段中,data_type为`numpy`时,返回图像掩码的二维数组 - -``` -{ - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 1, 1, 1, 0, 0, 0, 0}, - {0, 0, 0, 1, 1, 1, 0, 0, 0, 0}, - {0, 0, 0, 1, 1, 1, 0, 0, 0, 0}, - {0, 0, 0, 1, 1, 1, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, -} -其中1代表为目标区域,0代表非目标区域 -``` - -data_type为`string`时,mask的游程编码,解析方式可参考 [demo](https://github.com/Baidu-AIP/EasyDL-Segmentation-Demo) - -# FAQ - -1.执行infer_demo文件时,提示your generated code is out of date and must be regenerated with protoc >= 3.19.0 - - 进入当前项目,首先卸载protobuf - - ```shell - python3 -m pip uninstall protobuf - ``` - - 安装低版本protobuf - - ```shell - python3 -m pip install protobuf==3.19.0 - ``` diff --git a/docs/ARM-Linux-Python-SDK-Serving.md b/docs/ARM-Linux-Python-SDK-Serving.md deleted file mode 100644 index aa37e7e405..0000000000 --- a/docs/ARM-Linux-Python-SDK-Serving.md +++ /dev/null @@ -1,266 +0,0 @@ -# 简介 - -本文档以[千分类模型_MobileNetV3](https://ai.baidu.com/easyedge/app/openSource)为例,介绍FastDeploy中的模型SDK, 在**ARM Linux Python** 环境下: (1)**服务化**推理部署步骤; (2)介绍模型推流全流程API,方便开发者了解项目后二次开发。其中ARM Linux Python请参考[ARM Linux C++环境下的HTTP推理部署](./ARM-Linux-CPP-SDK-Serving.md)文档。 - -**注意**:部分模型(如OCR等)不支持服务化推理。 - - - -* [简介](#简介) - -* [环境准备](#环境准备) - - * [1.SDK下载](#1sdk下载) - * [2.硬件支持](#2硬件支持) - * [3.Python环境](#3python环境) - * [4.安装依赖](#4安装依赖) - * [4.1.安装paddlepaddle](#41安装paddlepaddle) - * [4.2.安装EasyEdge Python Wheel 包](#42安装easyedge-python-wheel-包) - -* [快速开始](#快速开始) - - * [1.文件结构说明](#1文件结构说明) - * [2.测试Serving服务](#2测试serving服务) - * [2.1 启动HTTP预测服务](#21-启动http预测服务) - -* [HTTP API流程详解](#http-api流程详解) - - * [1. 开启http服务](#1-开启http服务) - * [2. 请求http服务](#2-请求http服务) - * [2.1 http 请求方式:不使用图片base64格式](#21-http-请求方式不使用图片base64格式) - * [3. http返回数据](#3-http返回数据) - -* [FAQ](#faq) - - - -# 环境准备 - -## 1.SDK下载 - -根据开发者模型、部署芯片、操作系统需要,在图像界面[飞桨开源模型](https://ai.baidu.com/easyedge/app/openSource)或[GIthub](https://github.com/PaddlePaddle/FastDeploy)中选择对应的SDK进行下载。解压缩后的文件结构如下。 - -```shell -EasyEdge-Linux-x86-[部署芯片] -├── RES # 模型文件资源文件夹,可替换为其他模型 -├── README.md -├── cpp # C++ SDK -└── python # Python SDK -``` - -## 2.硬件支持 - -目前支持的ARM架构:aarch64 、armv7hf - -## 3.Python环境 - -> ARM Linux SDK仅支持Python 3.6 - -使用如下命令获取已安装的Python版本号。如果本机的版本不匹配,需要根据ARM Linux下Python安装方式进行安装。(不建议在ARM Linux下使用conda,因为ARM Linux场景通常资源很有限) - -```shell -$python3 --version -``` - -接着使用如下命令确认pip的版本是否满足要求,要求pip版本为20.2.2或更高版本。详细的pip安装过程可以参考[官网教程](https://pip.pypa.io/en/stable/installation/)。 - -```shell -$python3 -m pip --version -``` - -## 4.安装依赖 - -### 4.1.安装paddlepaddle - -根据具体的部署芯片(CPU/GPU)安装对应的PaddlePaddle的whl包。 - -`armv8 CPU平台`可以使用如下命令进行安装: - -```shell -python3 -m pip install http://aipe-easyedge-public.bj.bcebos.com/easydeploy/paddlelite-2.11-cp36-cp36m-linux_aarch64.whl -``` - -### 4.2.安装EasyEdge Python Wheel 包 - -在`python`目录下,安装特定Python版本的EasyEdge Wheel包。`armv8 CPU平台`可以使用如下命令进行安装: - -```shell -python3 -m pip install -U BaiduAI_EasyEdge_SDK-1.3.1-cp36-cp36m-linux_aarch64.whl -``` - -# 二.快速开始 - -## 1.文件结构说明 - -Python SDK文件结构如下: - -```shell -EasyEdge-Linux-x86--[部署芯片] -├──... -├──python # Linux Python SDK - ├── # 特定Python版本的EasyEdge Wheel包, 二次开发可使用 - ├── BBaiduAI_EasyEdge_SDK-1.3.1-cp36-cp36m-linux_aarch64.whl - ├── infer_demo # demo体验完整文件 - │ ├── demo_xxx.py # 包含前后处理的端到端推理demo文件 - │ └── demo_serving.py # 提供http服务的demo文件 - ├── tensor_demo # 学习自定义算法前后处理时使用 - │ └── demo_xxx.py -``` - -## 2.测试Serving服务 - -> 模型资源文件默认已经打包在开发者下载的SDK包中, 默认为`RES`目录。 - -### 2.1 启动HTTP预测服务 - -指定对应的模型文件夹(默认为`RES`)、设备ip和指定端口号,运行如下命令。 - -```shell -python3 demo_serving.py {模型RES文件夹} {host, default 0.0.0.0} {port, default 24401} -``` - -成功启动后,终端中会显示如下字样。 - -```shell -... -* Running on {host ip}:24401 -``` - -如果是在局域网内的机器上部署,开发者此时可以打开浏览器,输入`http://{host ip}:24401`,选择图片来进行测试,运行效果如下。 - - - -如果是在远程机器上部署,那么可以参考`demo_serving.py`中的 `http_client_test()函数`请求http服务来执行推理。 - -# 三. HTTP API流程详解 - -## 1. 开启http服务 - -http服务的启动使用`demo_serving.py`文件 - -```python -class Serving(object): - """ - SDK local serving - """ - - def __init__(self, model_dir, license='', model_filename='model', params_filename='params'): - - self.program = None - self.model_dir = model_dir - self.model_filename = model_filename - self.params_filename = params_filename - self.program_lock = threading.Lock() - self.license_key = license - # 只有ObjectTracking会初始化video_processor - self.video_processor = None - - def run(self, host, port, device, engine=Engine.PADDLE_FLUID, service_id=0, device_id=0, **kwargs): - """ - Args: - host : str - port : str - device : BaiduAI.EasyEdge.Device,比如:Device.CPU - engine : BaiduAI.EasyEdge.Engine, 比如: Engine.PADDLE_FLUID - """ - self.run_serving_with_flask(host, port, device, engine, service_id, device_id, **kwargs) -``` - -## 2. 请求http服务 - -> 开发者可以打开浏览器,`http://{设备ip}:24401`,选择图片来进行测试。 - -### 2.1 http 请求方式:不使用图片base64格式 - -URL中的get参数: - -| 参数 | 说明 | 默认值 | -| --------- | --------- | ---------------- | -| threshold | 阈值过滤, 0~1 | 如不提供,则会使用模型的推荐阈值 | - -HTTP POST Body即为图片的二进制内容 - -Python请求示例 - -```python -import requests - -with open('./1.jpg', 'rb') as f: - img = f.read() - result = requests.post( - 'http://127.0.0.1:24401/', - params={'threshold': 0.1}, - data=img).json() -``` - -## 3. http返回数据 - -| 字段 | 类型说明 | 其他 | -| ---------- | ------ | ------------------------------------ | -| error_code | Number | 0为成功,非0参考message获得具体错误信息 | -| results | Array | 内容为具体的识别结果。其中字段的具体含义请参考`预测图像-返回格式`一节 | -| cost_ms | Number | 预测耗时ms,不含网络交互时间 | - -返回示例 - -```json -{ - "cost_ms": 52, - "error_code": 0, - "results": [ - { - "confidence": 0.94482421875, - "index": 1, - "label": "IronMan", - "x1": 0.059185408055782318, - "x2": 0.18795496225357056, - "y1": 0.14762254059314728, - "y2": 0.52510076761245728, - "mask": "...", // 图像分割模型字段 - "trackId": 0, // 目标追踪模型字段 - }, - - ] -} -``` - -***关于矩形坐标*** - -x1 * 图片宽度 = 检测框的左上角的横坐标 - -y1 * 图片高度 = 检测框的左上角的纵坐标 - -x2 * 图片宽度 = 检测框的右下角的横坐标 - -y2 * 图片高度 = 检测框的右下角的纵坐标 - -*** 关于图像分割mask *** - -``` -cv::Mat mask为图像掩码的二维数组 -{ - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 1, 1, 1, 0, 0, 0, 0}, - {0, 0, 0, 1, 1, 1, 0, 0, 0, 0}, - {0, 0, 0, 1, 1, 1, 0, 0, 0, 0}, - {0, 0, 0, 1, 1, 1, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, -} -其中1代表为目标区域,0代表非目标区域 -``` - -# FAQ - -1.执行infer_demo文件时,提示your generated code is out of date and must be regenerated with protoc >= 3.19.0 - - 进入当前项目,首先卸载protobuf - - ```shell - python3 -m pip uninstall protobuf - ``` - - 安装低版本protobuf - - ```shell - python3 -m pip install protobuf==3.19.0 - ``` diff --git a/docs/Android-SDK.md b/docs/Android-SDK.md deleted file mode 100644 index 2a2abbc560..0000000000 --- a/docs/Android-SDK.md +++ /dev/null @@ -1,404 +0,0 @@ -# 简介 - -本文档介绍FastDeploy中的模型SDK,在Android环境下:(1)推理操作步骤;(2)介绍模型SDK使用说明,方便开发者了解项目后二次开发。 - - - -* [简介](#简介) - -* [系统支持说明](#系统支持说明) - -* [快速开始](#快速开始) - - * [1. 项目结构说明](#1-项目结构说明) - * [2. APP 标准版测试](#2-app-标准版测试) - * [2.1 扫码体验](#21-扫码体验) - * [2.2 源码运行](#22-源码运行) - * [3. 精简版测试](#3-精简版测试) - -* [SDK使用说明](#sdk使用说明) - - * [1. 集成指南](#1-集成指南) - * [1.1 依赖库集成](#11-依赖库集成) - * [1.2 添加权限](#12-添加权限) - * [1.3 混淆规则(可选)](#13-混淆规则可选) - * [2. API调用流程示例](#2-api调用流程示例) - * [2.1 初始化](#21-初始化) - * [2.2 预测图像](#22-预测图像) - -* [错误码](#错误码) - - - -# 系统支持说明 - -1. Android 版本支持范围:Android 5.0(API21)<= Android < Android 10(API 29)。 - -2. 硬件支持情况:支持 arm64-v8a 和 armeabi-v7a,暂不支持模拟器。 -* 官网测试机型:红米k30,Vivo v1981a,华为oxp-an00,华为cdy-an90,华为pct-al10,荣耀yal-al00,OPPO Reno5 Pro 5G -3. 其他说明 -* 【图像分割类算法】(1)图像分割类算法,暂未提供实时摄像头推理功能,开发者可根据自己需要,进行安卓开发;(2)PP-Humanseg-Lite模型设计初衷为横屏视频会议等场景,本次安卓SDK仅支持竖屏场景,开发者可根据自己需要,开发横屏功能。 -* 【OCR模型】OCR任务第一次启动任务,第一张推理时间久,属于正常情况(因为涉及到模型加载、预处理等工作)。 - -> 预测图像时运行内存不能过小,一般大于模型资源文件夹大小的3倍。 - -# 快速开始 - -## 1. 项目结构说明 - -根据开发者模型、部署芯片、操作系统需要,在图像界面[飞桨开源模型](https://ai.baidu.com/easyedge/app/openSource)或[GIthub](https://github.com/PaddlePaddle/FastDeploy)中选择对应的SDK进行下载。SDK目录结构如下: - -``` -.EasyEdge-Android-SDK -├── app -│ ├── src/main -│ │ ├── assets -│ │ │ ├── demo -│ │ │ │ └── conf.json # APP名字 -│ │ │ ├── infer # 模型资源文件夹,一套模型适配不同硬件、OS和部署方式 -│ │ │ │ ├── model # 模型结构文件 -│ │ │ │ ├── params # 模型参数文件 -│ │ │ │ ├── label_list.txt # 模型标签文件 -│ │ │ │ └── infer_cfg.json # 模型前后处理等配置文件 -│ │ ├── java/com.baidu.ai.edge/demo -│ │ │ ├── infertest # 通用ARM精简版测试 -│ │ │ │ ├── TestInferClassifyTask.java # 图像分类 -│ │ │ │ ├── TestInferDetectionTask.java # 物体检测 -│ │ │ │ ├── TestInferSegmentTask.java # 实例分割 -│ │ │ │ ├── TestInferPoseTask.java # 姿态估计 -│ │ │ │ ├── TestInferOcrTask.java # OCR -│ │ │ │ └── MainActivity.java # 精简版启动 Activity -│ │ │ ├── MainActivity.java # Demo APP 启动 Activity -│ │ │ ├── CameraActivity.java # 摄像头UI逻辑 -│ │ │ └── ... -│ │ └── ... -│ ├── libs -│ │ ├── armeabi-v7a # v7a的依赖库 -│ │ ├── arm64-v8a # v8a的依赖库 -│ │ └── easyedge-sdk.jar # jar文件 -│ └── ... -├── camera_ui # UI模块,包含相机逻辑 -├── README.md -└── ... # 其他 gradle 等工程文件 -``` - -## 2. APP 标准版测试 - -考虑部分Android开发板没有摄像头,因此本项目开发了标准版和精简版两种。标准版会调用Android系统的摄像头,采集摄像头来进行AI模型推理;精简版在没有摄像头的开发板上运行,需要开发者准备图像。开发者根据硬件情况,选择对应的版本。 - -### 2.1 扫码体验 - -扫描二维码(二维码见下载网页`体验Demo`),无需任何依赖,手机上下载即可直接体验。 - -
- -### 2.2 源码运行 - -(1)下载对应的SDK,解压工程。
-
-(2)打开Android Studio, 点击 "Import Project...",即:File->New-> "Import Project...", 选择解压后的目录。
-(3)手机链接Android Studio,并打开开发者模式。(不了解开发者模式的开发者,可浏览器搜索)
-(4)此时点击运行按钮,手机上会有新app安装完毕,运行效果和二维码扫描的一样。
- -
- -## 3. 精简版测试 - -* 考虑部分Android开发板没有摄像头,本项目提供了精简版本,精简版忽略摄像头等UI逻辑,可兼容如无摄像头的开发板测试。 - -* 精简版对应的测试图像路径,在代码`src/main/java/com.baidu.ai.edge/demo/TestInfer*.java`中进行了设置,开发者可以准备图像到对应路径测试,也可以修改java代码测试。 - -* 支持以下硬件环境的精简版测试:通用ARM:图像分类、物体检测、实例分割、姿态估计、文字识别。 - -示例代码位于 app 模块下 infertest 目录,修改 app/src/main/AndroidManifest.xml 中的启动 Activity 开启测试。 -修改前: - -``` - - - - infertest.MainActivity - - - - - -``` - -修改后: - -``` - - - - - - - -``` - -注意:修改后,因为没有测试数据,需要开发者准备一张测试图像,放到 `app/src/main/asserts/` 路径下,并按照`app/src/main/java/com/baidu/ai/edge/demo/infertest/TestInfer*.java`中的图像命名要求对图像进行命名。 - -
- -| Demo APP 检测模型运行示例 | 精简版检测模型运行示例 | -| --------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------- | -| ![Demo APP](https://user-images.githubusercontent.com/54695910/175855181-595fd449-7351-4ec6-a3b8-68c021b152f6.jpeg) | ![精简版](https://user-images.githubusercontent.com/54695910/175855176-075f0c8a-b05d-4d60-a2a1-3f0204c6386e.jpeg) | -
- -# SDK使用说明 - -本节介绍如何将 SDK 接入开发者的项目中使用。 - -## 1. 集成指南 - -步骤一:依赖库集成 -步骤二:添加必要权限 -步骤三:混淆配置(可选) - -### 1.1 依赖库集成 - -A. 项目中未集成其他 jar 包和 so 文件: - -``` -// 1. 复制 app/libs 至项目的 app/libs 目录 -// 2. 参考 app/build.gradle 配置 NDK 可用架构和 so 依赖库目录 - -android { - ... - defaultConfig { - ndk { - abiFilters 'armeabi-v7a', 'arm64-v8a' - } - } - sourceSets { - main { - jniLibs.srcDirs = ['libs'] - } - } -} -``` - -B. 项目中已集成其他 jar 包,未集成 so 文件: - -``` -// 1. 复制 app/libs/easyedge-sdk.jar 与其他 jar 包同目录 -// 2. 复制 app/libs 下 armeabi-v7a 和 arm64-v8a 目录至 app/src/main/jniLibs 目录下 -// 3. 参考 app/build.gradle 配置 NDK 可用架构 - -android { - ... - defaultConfig { - ndk { - abiFilters 'armeabi-v7a', 'arm64-v8a' - } - } -} -``` - -C. 项目中已集成其他 jar 包和 so 文件: - -``` -// 1. 复制 app/libs/easyedge-sdk.jar 与其他 jar 包同目录 -// 2. 融合 app/libs 下 armeabi-v7a 和 arm64-v8a 下的 so 文件与其他同架构 so 文件同目录 -// 3. 参考 app/build.gradle 配置 NDK 可用架构 - -android { - ... - defaultConfig { - ndk { - abiFilters 'armeabi-v7a', 'arm64-v8a' // 只支持 v7a 和 v8a 两种架构,有其他架构需删除 - } - } -} -``` - -### 1.2 添加权限 - -参考 app/src/main/AndroidManifest.xml 中配置的权限。 - -``` - - - -``` - -### 1.3 混淆规则(可选) - -请不要混淆 jar 包文件,参考 app/proguard-rules.pro 配置。 - -``` --keep class com.baidu.ai.edge.core.*.*{ *; } -``` - -## 2. API调用流程示例 - -以通用ARM的图像分类预测流程为例,详细说明请参考后续章节: - -``` -try { - // step 1-1: 准备配置类 - InferConfig config = new InferConfig(context.getAssets(), "infer"); - - // step 1-2: 准备预测 Manager - InferManager manager = new InferManager(context, config, ""); - - // step 2-1: 准备待预测的图像,必须为 Bitmap.Config.ARGB_8888 格式,一般为默认格式 - Bitmap image = getFromSomeWhere(); - - // step 2-2: 预测图像 - List results = manager.classify(image, 0.3f); - - // step 3: 解析结果 - for (ClassificationResultModel resultModel : results) { - Log.i(TAG, "labelIndex=" + resultModel.getLabelIndex() - + ", labelName=" + resultModel.getLabel() - + ", confidence=" + resultModel.getConfidence()); - } - - // step 4: 释放资源。预测完毕请及时释放资源 - manager.destroy(); -} catch (Exception e) { - Log.e(TAG, e.getMessage()); -} -``` - -### 2.1 初始化 - -**准备配置类** -芯片与配置类对应关系: - -- 通用ARM:InferConfig - -``` -// 示例 -// 参数二为芯片对应的模型资源文件夹名称 -InferConfig config = new InferConfig(context.getAssets(), "infer"); -``` - -**准备预测 Manager** -芯片与 Manager 对应关系: - -- 通用ARM:InferManager - -``` -// 示例 -// 参数二为配置类对象 -// 参数三保持空字符串即可 -InferManager manager = new InferManager(context, config, ""); -``` - -> **注意** -> -> 1. 同一时刻只能有且唯一有效的 Manager,若要新建一个 Manager,之前创建的 Manager 需先调用 destroy() 销毁; -> 2. Manager 的任何方法都不能在 UI 线程调用; -> 3. Manager 的任何成员变量及方法由于线程同步问题,都必须在同一个线程中执行; - -### 2.2 预测图像 - -本节介绍各种模型类型的预测函数及结果解析。 - -> **注意** -> 预测函数可以多次调用,但必须在同一个线程中,不支持并发 -> 预测函数中的 confidence 非必需,默认使用模型推荐值。填 0 可返回所有结果 -> 待预测的图像必须为 Bitmap.Config.ARGB_8888 格式的 Bitmap - -**图像分类** - -``` -// 预测函数 -List classify(Bitmap bitmap) throws BaseException; -List classify(Bitmap bitmap, float confidence) throws BaseException; - -// 返回结果 -ClassificationResultModel -- label: 分类标签,定义在label_list.txt中 -- labelIndex: 分类标签对应的序号 -- confidence: 置信度,0-1 -``` - -**物体检测** - -``` -// 预测函数 -List detect(Bitmap bitmap) throws BaseException; -List detect(Bitmap bitmap, float confidence) throws BaseException; - -// 返回结果 -DetectionResultModel -- label: 标签,定义在label_list.txt中 -- confidence: 置信度,0-1 -- bounds: Rect,包含左上角和右下角坐标,指示物体在图像中的位置 -``` - -**实例分割** - -``` -// 预测函数 -List segment(Bitmap bitmap) throws BaseException; -List segment(Bitmap bitmap, float confidence) throws BaseException; - -// 返回结果 -SegmentationResultModel -- label: 标签,定义在label_list.txt中 -- confidence: 置信度,0-1 -- lableIndex: 标签对应的序号 -- box: Rect,指示物体在图像中的位置 -- mask: byte[],表示原图大小的0,1掩码,绘制1的像素即可得到当前对象区域 -- maskLEcode: mask的游程编码 -``` - -> 关于 maskLEcode 的解析方式可参考 [http demo](https://github.com/Baidu-AIP/EasyDL-Segmentation-Demo) - -**姿态估计** - -``` -// 预测函数 -List pose(Bitmap bitmap) throws BaseException; - -// 返回结果 -PoseResultModel -- label: 标签,定义在label_list.txt中 -- confidence: 置信度,0-1 -- points: Pair, 2个点构成一条线 -``` - -**文字识别** - -``` -// 预测函数 -List ocr(Bitmap bitmap) throws BaseException; -List ocr(Bitmap bitmap, float confidence) throws BaseException; - -// 返回结果 -OcrResultModel -- label: 识别出的文字 -- confidence: 置信度,0-1 -- points: List, 文字所在区域的点位 -``` - -# 错误码 - -| 错误码 | 错误描述 | 详细描述及解决方法 | -| ---- | ------------------------------ | ------------------------------------------------------------------------------------ | -| 1001 | assets 目录下用户指定的配置文件不存在 | SDK可以使用assets目录下config.json作为配置文件。如果传入的config.json不在assets目录下,则有此报错 | -| 1002 | 用户传入的配置文件作为json解析格式不准确,如缺少某些字段 | 正常情况下,demo中的config.json不要修改 | -| 19xx | Sdk内部错误 | 请与百度人员联系 | -| 2001 | XxxxMANAGER 只允许一个实例 | 如已有XxxxMANAGER对象,请调用destory方法 | -| 2002 | XxxxMANAGER 已经调用过destory方法 | 在一个已经调用destory方法的DETECT_MANAGER对象上,不允许再调用任何方法 | -| 2003 | 传入的assets下模型文件路径为null | XxxxConfig.getModelFileAssetPath() 返回为null。由setModelFileAssetPath(null)导致 | -| 2011 | libedge-xxxx.so 加载失败 | System.loadLibrary("edge-xxxx"); libedge-xxxx.so 没有在apk中。CPU架构仅支持armeabi-v7a arm-v8a | -| 2012 | JNI内存错误 | heap的内存不够 | -| 2103 | license过期 | license失效或者系统时间有异常 | -| 2601 | assets 目录下模型文件打开失败 | 请根据报错信息检查模型文件是否存在 | -| 2611 | 检测图片时,传递至引擎的图片二进制与长宽不符合 | 具体见报错信息 | -| 27xx | Sdk内部错误 | 请与百度人员联系 | -| 28xx | 引擎内部错误 | 请与百度人员联系 | -| 29xx | Sdk内部错误 | 请与百度人员联系 | -| 3000 | so加载错误 | 请确认所有so文件存在于apk中 | -| 3001 | 模型加载错误 | 请确认模型放置于能被加载到的合法路径中,并确保config.json配置正确 | -| 3002 | 模型卸载错误 | 请与百度人员联系 | -| 3003 | 调用模型错误 | 在模型未加载正确或者so库未加载正确的情况下调用了分类接口 | -| 50xx | 在线模式调用异常 | 请与百度人员联系 | diff --git a/docs/Jetson-Linux-CPP-SDK-Inference.md b/docs/Jetson-Linux-CPP-SDK-Inference.md deleted file mode 100644 index 512634a1bd..0000000000 --- a/docs/Jetson-Linux-CPP-SDK-Inference.md +++ /dev/null @@ -1,382 +0,0 @@ -# 简介 - -本文档介绍FastDeploy中的模型SDK, 在**Jetson Linux C++** 环境下:(1) 图像和视频 推理部署步骤, (2)介绍推理全流程API,方便开发者了解项目后二次开发。如果开发者对Jetson的服务化部署感兴趣,可以参考[Jetson CPP Serving](./Jetson-Linux-CPP-SDK-Serving.md)文档。 - -**注意**:OCR目前只支持**图像**推理部署。 - - - -* [简介](#简介) - -* [环境要求](#环境要求) - -* [快速开始](#快速开始) - - * [1. 项目结构说明](#1-项目结构说明) - * [2. 测试Demo](#2-测试demo) - * [2.1 预测图像](#21-预测图像) - * [2.2 预测视频流](#22-预测视频流) - -* [预测API流程详解](#预测api流程详解) - - * [1. SDK参数运行配置](#1-sdk参数运行配置) - * [2. 初始化Predictor](#2-初始化predictor) - * [3. 预测推理](#3-预测推理) - * [3.1 预测图像](#31-预测图像) - * [3.2 预测视频](#32-预测视频) - -* [FAQ](#faq) - - - -# 环境要求 - -* Jetpack: 4.6,安装Jetpack,参考[NVIDIA 官网-Jetpack4.6安装指南](https://developer.nvidia.com/jetpack-sdk-46),或者参考采购的硬件厂商提供的安装方式进行安装。![]() - - | 序号 | 硬件 | Jetpack安装方式 | 下载链接 | ---- | - | --- | ---------------- | ------------------ | ------------------------------------------------------------------------------------------------------------------------------------------- | ---- | - | 1 | Jetson Xavier NX | SD Card Image | [Download SD Card Image](https://developer.nvidia.com/embedded/l4t/r32_release_v6.1/jetson_xavier_nx/jetson-nx-jp46-sd-card-image.zip) | ---- | - | 2 | Jetson Nano | SD Card Image | [Download SD Card Image](https://developer.nvidia.com/embedded/l4t/r32_release_v6.1/jeston_nano/jetson-nano-jp46-sd-card-image.zip) | ---- | - | 3 | Jetson Nano 2GB | SD Card Image | [Download SD Card Image](https://developer.nvidia.com/embedded/l4t/r32_release_v6.1/jeston_nano_2gb/jetson-nano-2gb-jp46-sd-card-image.zip) | ---- | - | 4 | agx xavier等 | NVIDIA SDK Manager | [Download NVIDIA SDK](https://developer.nvidia.com/nvsdk-manager) | ---- | - | 5 | 非官方版本,如emmc版 | 参考采购的硬件公司提供的安装指南 | ---- | ---- | - - 注意:本项目SDK要求 `CUDA=10.2`、`cuDNN=8.2`、`TensorRT=8.0`、`gcc>=7.5` 、`cmake 在 3.0以上` ,安装 Jetpack4.6系统包后,CUDA、cuDNN、TensorRT、gcc和cmake版本就已经满足要求,无需在进行安装。 - -# 快速开始 - -## 1. 项目结构说明 - -根据开发者模型、部署芯片、操作系统需要,在图像界面[飞桨开源模型](https://ai.baidu.com/easyedge/app/openSource)或[GIthub](https://github.com/PaddlePaddle/FastDeploy)中选择对应的SDK进行下载。解压后SDK目录结构如下: - -``` -.EasyEdge-Linux-硬件芯片 -├── RES # 模型资源文件夹,一套模型适配不同硬件、OS和部署方式 -│ ├── conf.json        # Android、iOS系统APP名字需要 -│ ├── model # 模型结构文件 -│ ├── params # 模型参数文件 -│ ├── label_list.txt # 模型标签文件 -│ ├── infer_cfg.json # 模型前后处理等配置文件 -├── ReadMe.txt -├── cpp # C++ SDK 文件结构 - └── baidu_easyedge_linux_cpp_x86_64_CPU.Generic_gcc5.4_v1.4.0_20220325.tar.gz - ├── ReadMe.txt - ├── bin # 可直接运行的二进制文件 - ├── include # 二次开发用的头文件 - ├── lib # 二次开发用的所依赖的库 - ├── src # 二次开发用的示例工程 - └── thirdparty # 第三方依赖 -``` - -## 2. 测试Demo - -> 模型资源文件(即压缩包中的RES文件夹)默认已经打包在开发者下载的SDK包中,请先将tar包整体拷贝到具体运行的设备中,再解压缩使用。 - -SDK中已经包含预先编译的二进制,可直接运行。以下运行示例均是`cd cpp/bin`路径下执行的结果。 - -### 2.1 预测图像 - -```bash -./easyedge_image_inference {模型RES文件夹路径} {测试图片路径} -``` - -运行效果示例: - -
- -```bash - > ./easyedge_image_inference ../../../../RES 2.jpeg -2019-02-13 16:46:12,659 INFO [EasyEdge] [easyedge.cpp:34] 140606189016192 Baidu EasyEdge Linux Development Kit 0.2.1(20190213) -2019-02-13 16:46:14,083 INFO [EasyEdge] [paddlev2_edge_predictor.cpp:60] 140606189016192 Allocate graph success. -2019-02-13 16:46:14,326 DEBUG [EasyEdge] [paddlev2_edge_predictor.cpp:143] 140606189016192 Inference costs 168 ms -1, 1:txt_frame, p:0.994905 loc: 0.168161, 0.153654, 0.920856, 0.779621 -Done -``` - -### 2.2 预测视频流 - -``` -./easyedge_video_inference {模型RES文件夹路径} {video_type} {video_src_path} -``` - -其中 video_type 支持三种: - -``` - video_type : 1 // 本地视频文件 - video_type : 2 // 摄像头的index - video_type : 3 // 网络视频流 -``` - -video_src_path: 为 video_type 数值所对应的本地视频路径 、本地摄像头id、网络视频流地址,如: - -``` - 本地视频文件: ./easyedge_video_inference {模型RES文件夹路径} 1 ~/my_video_file.mp4 - 本地摄像头: ./easyedge_video_inference {模型RES文件夹路径} 2 1 #/dev/video1 - 网络视频流: ./easyedge_video_inference {模型RES文件夹路径} 3 rtmp://192.168.x.x:8733/live/src -``` - -注:以上路径是假模拟路径,开发者需要根据自己实际图像/视频,准备测试图像,并填写正确的测试路径。 - -# 预测API流程详解 - -本章节主要结合[2.测试Demo](#4)的Demo示例介绍推理API,方便开发者学习并将运行库嵌入到开发者的程序当中,更详细的API请参考`include/easyedge/easyedge*.h`文件。图像、视频的推理包含以下3个API,如代码step注释所示: - -> ❗注意:
-> (1)`src`文件夹中包含完整可编译的cmake工程实例,建议开发者先行了解[cmake工程基本知识](https://cmake.org/cmake/help/latest/guide/tutorial/index.html)。
-> (2)请优先参考SDK中自带的Demo工程的使用流程和说明。遇到错误,请优先参考文件中的注释、解释、日志说明。 - -```cpp - // step 1: SDK配置运行参数 - EdgePredictorConfig config; - config.model_dir = {模型文件目录}; - - // step 2: 创建并初始化Predictor;这这里选择合适的引擎 - auto predictor = global_controller()->CreateEdgePredictor(config); - - // step 3-1: 预测图像 - auto img = cv::imread({图片路径}); - std::vector results; - predictor->infer(img, results); - - // step 3-2: 预测视频 - std::vector results; - FrameTensor frame_tensor; - VideoConfig video_config; - video_config.source_type = static_cast(video_type); // source_type 定义参考头文件 easyedge_video.h - video_config.source_value = video_src; - /* - ... more video_configs, 根据需要配置video_config的各选项 - */ - auto video_decoding = CreateVideoDecoding(video_config); - while (video_decoding->next(frame_tensor) == EDGE_OK) { - results.clear(); - if (frame_tensor.is_needed) { - predictor->infer(frame_tensor.frame, results); - render(frame_tensor.frame, results, predictor->model_info().kind); - } - //video_decoding->display(frame_tensor); // 显示当前frame,需在video_config中开启配置 - //video_decoding->save(frame_tensor); // 存储当前frame到视频,需在video_config中开启配置 - } -``` - -若需自定义library search path或者gcc路径,修改对应Demo工程下的CMakeList.txt即可。 - -## 1. SDK参数运行配置 - -SDK的参数通过`EdgePredictorConfig::set_config`和`global_controller()->set_config`配置。本Demo 中设置了模型路径,其他参数保留默认参数。更详细的支持运行参数等,可以参考开发工具包中的头文件(`include/easyedge/easyedge_xxxx_config.h`)的详细说明。 - -配置参数使用方法如下: - -``` -EdgePredictorConfig config; -config.model_dir = {模型文件目录}; -``` - -## 2. 初始化Predictor - -* 接口 - - ```cpp - auto predictor = global_controller()->CreateEdgePredictor(config); - predictor->init(); - ``` - -若返回非0,请查看输出日志排查错误原因。 - -## 3. 预测推理 - -### 3.1 预测图像 - -> 在Demo中展示了预测接口infer()传入cv::Mat& image图像内容,并将推理结果赋值给std::vector& result。更多关于infer()的使用,可以根据参考`easyedge.h`头文件中的实际情况、参数说明自行传入需要的内容做推理 - -* 接口输入 - -```cpp - /** - * @brief - * 通用接口 - * @param image: must be BGR , HWC format (opencv default) - * @param result - * @return - */ - virtual int infer(cv::Mat& image, std::vector& result) = 0; -``` - - 图片的格式务必为opencv默认的BGR, HWC格式。 - -* 接口返回 - - `EdgeResultData`中可以获取对应的分类信息、位置信息。 - -```cpp -struct EdgeResultData { - int index; // 分类结果的index - std::string label; // 分类结果的label - float prob; // 置信度 - - // 物体检测 或 图像分割时使用: - float x1, y1, x2, y2; // (x1, y1): 左上角, (x2, y2): 右下角; 均为0~1的长宽比例值。 - - // 图像分割时使用: - cv::Mat mask; // 0, 1 的mask - std::string mask_rle; // Run Length Encoding,游程编码的mask -}; -``` - -*** 关于矩形坐标 *** - -x1 * 图片宽度 = 检测框的左上角的横坐标 - -y1 * 图片高度 = 检测框的左上角的纵坐标 - -x2 * 图片宽度 = 检测框的右下角的横坐标 - -y2 * 图片高度 = 检测框的右下角的纵坐标 - -*** 关于图像分割mask *** - -``` -cv::Mat mask为图像掩码的二维数组 -{ - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 1, 1, 1, 0, 0, 0, 0}, - {0, 0, 0, 1, 1, 1, 0, 0, 0, 0}, - {0, 0, 0, 1, 1, 1, 0, 0, 0, 0}, - {0, 0, 0, 1, 1, 1, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, -} -其中1代表为目标区域,0代表非目标区域 -``` - -*** 关于图像分割mask_rle *** - -该字段返回了mask的游程编码,解析方式可参考 [http demo](https://github.com/Baidu-AIP/EasyDL-Segmentation-Demo) - -以上字段可以参考demo文件中使用opencv绘制的逻辑进行解析 - -### 3.2 预测视频 - -SDK 提供了支持摄像头读取、视频文件和网络视频流的解析工具类`VideoDecoding`,此类提供了获取视频帧数据的便利函数。通过`VideoConfig`结构体可以控制视频/摄像头的解析策略、抽帧策略、分辨率调整、结果视频存储等功能。对于抽取到的视频帧可以直接作为SDK infer 接口的参数进行预测。 - -* 接口输入 - -class`VideoDecoding`: - -``` - /** - * @brief 获取输入源的下一帧 - * @param frame_tensor - * @return - */ - virtual int next(FrameTensor &frame_tensor) = 0; - - /** - * @brief 显示当前frame_tensor中的视频帧 - * @param frame_tensor - * @return - */ - virtual int display(const FrameTensor &frame_tensor) = 0; - - /** - * @brief 将当前frame_tensor中的视频帧写为本地视频文件 - * @param frame_tensor - * @return - */ - virtual int save(FrameTensor &frame_tensor) = 0; - - /** - * @brief 获取视频的fps属性 - * @return - */ - virtual int get_fps() = 0; - /** - * @brief 获取视频的width属性 - * @return - */ - virtual int get_width() = 0; - - /** - * @brief 获取视频的height属性 - * @return - */ - virtual int get_height() = 0; -``` - -struct `VideoConfig` - -``` -/** - * @brief 视频源、抽帧策略、存储策略的设置选项 - */ -struct VideoConfig { - SourceType source_type; // 输入源类型 - std::string source_value; // 输入源地址,如视频文件路径、摄像头index、网络流地址 - int skip_frames{0}; // 设置跳帧,每隔skip_frames帧抽取一帧,并把该抽取帧的is_needed置为true - int retrieve_all{false}; // 是否抽取所有frame以便于作为显示和存储,对于不满足skip_frames策略的frame,把所抽取帧的is_needed置为false - int input_fps{0}; // 在采取抽帧之前设置视频的fps - Resolution resolution{Resolution::kAuto}; // 采样分辨率,只对camera有效 - - bool enable_display{false}; // 默认不支持。 - std::string window_name{"EasyEdge"}; - bool display_all{false}; // 是否显示所有frame,若为false,仅显示根据skip_frames抽取的frame - - bool enable_save{false}; - std::string save_path; // frame存储为视频文件的路径 - bool save_all{false}; // 是否存储所有frame,若为false,仅存储根据skip_frames抽取的frame - - std::map conf; -}; -``` - -| 序号 | 字段 | 含义 | -| --- | -------------- | ---------------------------------------------------------------------------------------------------------------------------------- | -| 1 | `source_type` | 输入源类型,支持视频文件、摄像头、网络视频流三种,值分别为1、2、3 | -| 2 | `source_value` | 若`source_type`为视频文件,该值为指向视频文件的完整路径;若`source_type`为摄像头,该值为摄像头的index,如对于`/dev/video0`的摄像头,则index为0;若`source_type`为网络视频流,则为该视频流的完整地址。 | -| 3 | `skip_frames` | 设置跳帧,每隔skip_frames帧抽取一帧,并把该抽取帧的is_needed置为true,标记为is_needed的帧是用来做预测的帧。反之,直接跳过该帧,不经过预测。 | -| 4 | `retrieve_all` | 若置该项为true,则无论是否设置跳帧,所有的帧都会被抽取返回,以作为显示或存储用。 | -| 5 | `input_fps` | 用于抽帧前设置fps | -| 6 | `resolution` | 设置摄像头采样的分辨率,其值请参考`easyedge_video.h`中的定义,注意该分辨率调整仅对输入源为摄像头时有效 | -| 7 | `conf` | 高级选项。部分配置会通过该map来设置 | - -*** 注意:*** - -1. `VideoConfig`不支持`display`功能。如果需要使用`VideoConfig`的`display`功能,需要自行编译带有GTK选项的OpenCV。 - -2. 使用摄像头抽帧时,如果通过`resolution`设置了分辨率调整,但是不起作用,请添加如下选项: - - ``` - video_config.conf["backend"] = "2"; - ``` - -3.部分设备上的CSI摄像头尚未兼容,如遇到问题,可以通过工单、QQ交流群或微信交流群反馈。 - -具体接口调用流程,可以参考SDK中的`demo_video_inference`。 - -# FAQ - -1. 如何处理一些 undefined reference / error while loading shared libraries? - - > 如:./easyedge_demo: error while loading shared libraries: libeasyedge.so.1: cannot open shared object file: No such file or directory - - 遇到该问题时,请找到具体的库的位置,设置LD_LIBRARY_PATH;或者安装缺少的库。 - - > 示例一:libverify.so.1: cannot open shared object file: No such file or directory - > 链接找不到libveirfy.so文件,一般可通过 export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:../../lib 解决(实际冒号后面添加的路径以libverify.so文件所在的路径为准) - - > 示例二:libopencv_videoio.so.4.5: cannot open shared object file: No such file or directory - > 链接找不到libopencv_videoio.so文件,一般可通过 export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:../../thirdparty/opencv/lib 解决(实际冒号后面添加的路径以libopencv_videoio.so所在路径为准) - - > 示例三:GLIBCXX_X.X.X not found - > 链接无法找到glibc版本,请确保系统gcc版本>=SDK的gcc版本。升级gcc/glibc可以百度搜索相关文献。 - -2. 运行二进制时,提示 libverify.so cannot open shared object file - - 可能cmake没有正确设置rpath, 可以设置LD_LIBRARY_PATH为sdk的lib文件夹后,再运行: - - ```bash - LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../lib ./easyedge_demo - ``` - -3. 编译时报错:file format not recognized - - 可能是因为在复制SDK时文件信息丢失。请将整个压缩包复制到目标设备中,再解压缩、编译。 diff --git a/docs/Jetson-Linux-CPP-SDK-Serving.md b/docs/Jetson-Linux-CPP-SDK-Serving.md deleted file mode 100644 index 87b75ac098..0000000000 --- a/docs/Jetson-Linux-CPP-SDK-Serving.md +++ /dev/null @@ -1,293 +0,0 @@ -# 简介 - -本文档介绍FastDeploy中的模型SDK,在**Jetson Linux C++** 环境下:(1) **服务化**推理部署步骤,(2)介绍推理全流程API,方便开发者了解项目后二次开发。如果开发者对Jetson图像/视频部署感兴趣,可以参考[Jetson CPP Inference](./Jetson-Linux-CPP-SDK-Inference.md)文档。 - -**注意**:OCR目前不支持服务化推理部署。 - - - -* [简介](#简介) - -* [环境准备](#环境准备) - -* [快速开始](#快速开始) - - * [1. 项目结构说明](#1-项目结构说明) - * [2. 测试 HTTP Demo](#2-测试-http-demo) - * [2.1 启动HTTP预测服务](#21-启动http预测服务) - -* [HTTP API介绍](#http-api介绍) - - * [1. 开启http服务](#1-开启http服务) - * [2. 请求http服务](#2-请求http服务) - * [2.1 http 请求方式一:不使用图片base64格式](#21-http-请求方式一不使用图片base64格式) - * [2.2 http 请求方法二:使用图片base64格式](#22-http-请求方法二使用图片base64格式) - * [3. http 返回数据](#3-http-返回数据) - -* [FAQ](#faq) - - - -# 环境准备 - -* Jetpack: 4.6 。安装Jetpack 4.6,参考[NVIDIA 官网-Jetpack4.6安装指南](https://developer.nvidia.com/jetpack-sdk-46),或者参考采购的硬件厂商提供的安装方式进行安装。![]() - - | 序号 | 硬件 | Jetpack安装方式 | 下载链接 | ---- | - | --- | ---------------- | ------------------ | ------------------------------------------------------------------------------------------------------------------------------------------- | ---- | - | 1 | Jetson Xavier NX | SD Card Image | [Download SD Card Image](https://developer.nvidia.com/embedded/l4t/r32_release_v6.1/jetson_xavier_nx/jetson-nx-jp46-sd-card-image.zip) | ---- | - | 2 | Jetson Nano | SD Card Image | [Download SD Card Image](https://developer.nvidia.com/embedded/l4t/r32_release_v6.1/jeston_nano/jetson-nano-jp46-sd-card-image.zip) | ---- | - | 3 | Jetson Nano 2GB | SD Card Image | [Download SD Card Image](https://developer.nvidia.com/embedded/l4t/r32_release_v6.1/jeston_nano_2gb/jetson-nano-2gb-jp46-sd-card-image.zip) | ---- | - | 4 | agx xavier等 | NVIDIA SDK Manager | [Download NVIDIA SDK](https://developer.nvidia.com/nvsdk-manager) | ---- | - | 5 | 非官方版本,如emmc版 | 参考采购的硬件公司提供的安装指南 | ---- | ---- | - - 注意:本项目SDK要求 `CUDA=10.2`、`cuDNN=8.2`、`TensorRT=8.0`、`gcc>=7.5` 、`cmake 在 3.0以上` ,安装 Jetpack4.6系统包后,CUDA、cuDNN、TensorRT、gcc和cmake版本就已经满足要求,无需在进行安装。 - -# 快速开始 - -## 1. 项目结构说明 - -根据开发者模型、部署芯片、操作系统需要,在图像界面[飞桨开源模型](https://ai.baidu.com/easyedge/app/openSource)或[GIthub](https://github.com/PaddlePaddle/FastDeploy)中选择对应的SDK进行下载。解压后SDK目录结构如下: - -``` -.EasyEdge-Linux-硬件芯片 -├── RES # 模型资源文件夹,一套模型适配不同硬件、OS和部署方式 -│ ├── conf.json        # Android、iOS系统APP名字需要 -│ ├── model # 模型结构文件 -│ ├── params # 模型参数文件 -│ ├── label_list.txt # 模型标签文件 -│ ├── infer_cfg.json # 模型前后处理等配置文件 -├── ReadMe.txt -├── cpp # C++ SDK 文件结构 - └── baidu_easyedge_linux_cpp_x86_64_CPU.Generic_gcc5.4_v1.4.0_20220325.tar.gz - ├── ReadMe.txt - ├── bin # 可直接运行的二进制文件 - ├── include # 二次开发用的头文件 - ├── lib # 二次开发用的所依赖的库 - ├── src # 二次开发用的示例工程 - └── thirdparty # 第三方依赖 -``` - -## 2. 测试 HTTP Demo - -> 模型资源文件(即压缩包中的RES文件夹)默认已经打包在开发者下载的SDK包中,请先将tar包整体拷贝到具体运行的设备中,再解压缩使用。 - -SDK中已经包含预先编译的二进制,可直接运行。以下运行示例均是`cd cpp/bin`路径下执行的结果。 - -### 2.1 启动HTTP预测服务 - -``` -./easyedge_serving {模型RES文件夹路径} -``` - -启动后,日志中会显示如下设备IP和24401端口号信息: - -``` -HTTP is now serving at 0.0.0.0:24401 -``` - -此时,开发者可以打开浏览器,输入链接地址`http://0.0.0.0:24401`(这里的`设备IP和24401端口号`根据开发者电脑显示修改),选择图片来进行测试。 - -
- -同时,可以调用HTTP接口来访问服务,具体参考下文的[二次开发](#10)接口说明。 - -# HTTP API介绍 - -本章节主要结合[2.1 HTTP Demo]()的API介绍,方便开发者学习并将运行库嵌入到开发者的程序当中,更详细的API请参考`include/easyedge/easyedge*.h`文件。http服务包含服务端和客户端,目前支持的能力包括以下几种方式,Demo中提供了不使用图片base格式的`方式一:浏览器请求的方式`,其他几种方式开发者根据个人需要,选择开发。 - -## 1. 开启http服务 - -http服务的启动可直接使用`bin/easyedge_serving`,或参考`src/demo_serving.cpp`文件修改相关逻辑 - -```cpp - /** - * @brief 开启一个简单的demo http服务。 - * 该方法会block直到收到sigint/sigterm。 - * http服务里,图片的解码运行在cpu之上,可能会降低推理速度。 - * @tparam ConfigT - * @param config - * @param host - * @param port - * @param service_id service_id user parameter, uri '/get/service_id' will respond this value with 'text/plain' - * @param instance_num 实例数量,根据内存/显存/时延要求调整 - * @return - */ - template - int start_http_server( - const ConfigT &config, - const std::string &host, - int port, - const std::string &service_id, - int instance_num = 1); -``` - -## 2. 请求http服务 - -> 开发者可以打开浏览器,`http://{设备ip}:24401`,选择图片来进行测试。 - -### 2.1 http 请求方式一:不使用图片base64格式 - -URL中的get参数: - -| 参数 | 说明 | 默认值 | -| --------- | --------- | ---------------- | -| threshold | 阈值过滤, 0~1 | 如不提供,则会使用模型的推荐阈值 | - -HTTP POST Body即为图片的二进制内容(无需base64, 无需json) - -Python请求示例 - -```Python -import requests - -with open('./1.jpg', 'rb') as f: - img = f.read() - result = requests.post( - 'http://127.0.0.1:24401/', - params={'threshold': 0.1}, - data=img).json() -``` - -### 2.2 http 请求方法二:使用图片base64格式 - -HTTP方法:POST -Header如下: - -| 参数 | 值 | -| ------------ | ---------------- | -| Content-Type | application/json | - -**Body请求填写**: - -* 分类网络: - body 中请求示例 - - ``` - { - "image": "" - "top_num": 5 - } - ``` - - body中参数详情 - -| 参数 | 是否必选 | 类型 | 可选值范围 | 说明 | -| ------- | ---- | ------ | ----- | ----------------------------------------------------------------------------------- | -| image | 是 | string | - | 图像数据,base64编码,要求base64图片编码后大小不超过4M,最短边至少15px,最长边最大4096px,支持jpg/png/bmp格式 **注意去掉头部** | -| top_num | 否 | number | - | 返回分类数量,不填该参数,则默认返回全部分类结果 | - -* 检测和分割网络: - Body请求示例: - - ``` - { - "image": "" - } - ``` - - body中参数详情: - -| 参数 | 是否必选 | 类型 | 可选值范围 | 说明 | -| --------- | ---- | ------ | ----- | ----------------------------------------------------------------------------------- | -| image | 是 | string | - | 图像数据,base64编码,要求base64图片编码后大小不超过4M,最短边至少15px,最长边最大4096px,支持jpg/png/bmp格式 **注意去掉头部** | -| threshold | 否 | number | - | 默认为推荐阈值,也可自行根据需要进行设置 | - -Python请求示例: - -```Python -import base64 -import requests -def main(): - with open("图像路径", 'rb') as f: - result = requests.post("http://{服务ip地址}:24401/", json={ - "image": base64.b64encode(f.read()).decode("utf8") - }) - # print(result.request.body) - # print(result.request.headers) - print(result.content) - -if __name__ == '__main__': - main() -``` - -## 3. http 返回数据 - -| 字段 | 类型说明 | 其他 | -| ---------- | ------ | ------------------------------------ | -| error_code | Number | 0为成功,非0参考message获得具体错误信息 | -| results | Array | 内容为具体的识别结果。其中字段的具体含义请参考`预测图像-返回格式`一节 | -| cost_ms | Number | 预测耗时ms,不含网络交互时间 | - -返回示例 - -```json -{ - "cost_ms": 52, - "error_code": 0, - "results": [ - { - "confidence": 0.94482421875, - "index": 1, - "label": "IronMan", - "x1": 0.059185408055782318, - "x2": 0.18795496225357056, - "y1": 0.14762254059314728, - "y2": 0.52510076761245728, - "mask": "...", // 图像分割模型字段 - "trackId": 0, // 目标追踪模型字段 - }, - - ] -} -``` - -*** 关于矩形坐标 *** - -x1 * 图片宽度 = 检测框的左上角的横坐标 - -y1 * 图片高度 = 检测框的左上角的纵坐标 - -x2 * 图片宽度 = 检测框的右下角的横坐标 - -y2 * 图片高度 = 检测框的右下角的纵坐标 - -*** 关于分割模型 *** - -其中,mask为分割模型的游程编码,解析方式可参考 [http demo](https://github.com/Baidu-AIP/EasyDL-Segmentation-Demo) - -# FAQ - -1. 如何处理一些 undefined reference / error while loading shared libraries? - -> 如:./easyedge_demo: error while loading shared libraries: libeasyedge.so.1: cannot open shared object file: No such file or directory - -遇到该问题时,请找到具体的库的位置,设置LD_LIBRARY_PATH;或者安装缺少的库。 - -> 示例一:libverify.so.1: cannot open shared object file: No such file or directory -> 链接找不到libveirfy.so文件,一般可通过 export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:../../lib 解决(实际冒号后面添加的路径以libverify.so文件所在的路径为准) - -> 示例二:libopencv_videoio.so.4.5: cannot open shared object file: No such file or directory -> 链接找不到libopencv_videoio.so文件,一般可通过 export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:../../thirdparty/opencv/lib 解决(实际冒号后面添加的路径以libopencv_videoio.so所在路径为准) - -> 示例三:GLIBCXX_X.X.X not found -> 链接无法找到glibc版本,请确保系统gcc版本>=SDK的gcc版本。升级gcc/glibc可以百度搜索相关文献。 - -2. 使用libcurl请求http服务时,速度明显变慢 - -这是因为libcurl请求continue导致server等待数据的问题,添加空的header即可 - -```bash -headers = curl_slist_append(headers, "Expect:"); -``` - -3. 运行二进制时,提示 libverify.so cannot open shared object file - -可能cmake没有正确设置rpath, 可以设置LD_LIBRARY_PATH为sdk的lib文件夹后,再运行: - -```bash -LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../lib ./easyedge_demo -``` - -4. 编译时报错:file format not recognized - -可能是因为在复制SDK时文件信息丢失。请将整个压缩包复制到目标设备中,再解压缩、编译。 diff --git a/docs/Linux-CPP-SDK-Inference.md b/docs/Linux-CPP-SDK-Inference.md deleted file mode 100644 index c8e4eb200a..0000000000 --- a/docs/Linux-CPP-SDK-Inference.md +++ /dev/null @@ -1,412 +0,0 @@ -# 简介 - -本文档介绍FastDeploy中的模型SDK,在 **Intel X86-CPU/ NVIDIA GPU、Linux** 操作系统下的C++ :(1)图像和视频的推理部署步骤,(2)介绍推理全流程API,方便了解项目后二次开发。如果对Linux操作系统下的 Python部署感兴趣,请参考[Linux Python环境下的推理部署](./Linux-Python-SDK-Inference.md)文档。 - - - -* [简介](#简介) - -* [环境准备](#环境准备) - - * [1. 硬件支持](#1-硬件支持) - * [2. 软件环境](#2-软件环境) - -* [快速开始](#快速开始) - - * [1. 项目结构说明](#1-项目结构说明) - * [2. 测试Demo](#2-测试demo) - * [2.1. 预测图像](#21-预测图像) - * [2.2. 预测视频流](#22-预测视频流) - * [3. 编译Demo](#3-编译demo) - -* [预测API流程详解](#预测api流程详解) - - * [1. SDK参数运行配置](#1-sdk参数运行配置) - * [2. 初始化Predictor](#2-初始化predictor) - * [3. 预测推理](#3-预测推理) - * [3.1 预测图像](#31-预测图像) - * [3.2 预测视频](#32-预测视频) - -* [FAQ](#faq) - - - -# 环境准备 - -## 1.硬件支持 - -* NVIDIA GPU: x86_64 - * cuda支持版本:CUDA10.0/10.1/10.2 + cuDNN 7 (cuDNN版本>=7.6.5) - * cuda支持版本:CUDA11.0 + cuDNN v8.0.4 -* CPU:Intel x86_64 - -## 2. 软件环境 - -1.运行二进制文件-环境要求 - -* gcc: 5.4 以上 (GLIBCXX_3.4.22) - * Linux下查看gcc版本命名(可能因系统差异命令会不同):`gcc --version`; - * Linux下C++基础库GLIBCXX的命令(可能因系统差异路径会有不同,可检测自己环境下的情况):`strings /usr/lib64/libstdc++.so.6 | grep GLIBCXX` -* glibc:2.23以上 - * Linux查看命令:`ldd --version` - -2.二次开发编译-环境要求 - -编译源代码时,除了gcc、GLIBCXX、glibc满足`1.运行二进制文件-环境要求`外,还需要cmake满足要求。 - -* cmake: 3.0 以上 - - * Linux查看命令:`cmake --version` - -# 快速开始 - -## 1. 项目结构说明 - -根据开发者模型、部署芯片、操作系统需要,在图像界面[飞桨开源模型](https://ai.baidu.com/easyedge/app/openSource)或[GIthub](https://github.com/PaddlePaddle/FastDeploy)中选择对应的SDK进行下载。SDK目录结构如下: - -``` -.EasyEdge-Linux-硬件芯片 -├── RES # 模型资源文件夹,一套模型适配不同硬件、OS和部署方式 -│ ├── conf.json        # Android、iOS系统APP名字需要 -│ ├── model # 模型结构文件 -│ ├── params # 模型参数文件 -│ ├── label_list.txt # 模型标签文件 -│ ├── infer_cfg.json # 模型前后处理等配置文件 -├── ReadMe.txt -├── cpp # C++ SDK 文件结构 - └── baidu_easyedge_linux_cpp_x86_64_CPU.Generic_gcc5.4_v1.4.0_20220325.tar.gz - ├── ReadMe.txt - ├── bin # 可直接运行的二进制文件 - ├── include # 二次开发用的头文件 - ├── lib # 二次开发用的所依赖的库 - ├── src # 二次开发用的示例工程 - └── thirdparty # 第三方依赖 -└── python # Python SDK 文件 -``` - -## 2. 测试Demo - -**注意**: OCR算法目前没有提供 - -> 模型资源文件(即压缩包中的RES文件夹)默认已经打包在开发者下载的SDK包中,请先将tar包整体拷贝到具体运行的设备中,再解压缩使用。 - -SDK中已经包含预先编译的二进制,可直接运行。以下运行示例均是`cd cpp/bin`路径下执行的结果。 - -### 2.1. 预测图像 - -```bash -./easyedge_image_inference {模型RES文件夹路径} {测试图片路径} -``` - -运行效果示例: - -
- -```bash - > ./easyedge_image_inference ../../../../RES 2.jpeg -2019-02-13 16:46:12,659 INFO [EasyEdge] [easyedge.cpp:34] 140606189016192 Baidu EasyEdge Linux Development Kit 0.2.1(20190213) -2019-02-13 16:46:14,083 INFO [EasyEdge] [paddlev2_edge_predictor.cpp:60] 140606189016192 Allocate graph success. -2019-02-13 16:46:14,326 DEBUG [EasyEdge] [paddlev2_edge_predictor.cpp:143] 140606189016192 Inference costs 168 ms -1, 1:txt_frame, p:0.994905 loc: 0.168161, 0.153654, 0.920856, 0.779621 -Done -``` - -### 2.2. 预测视频流 - -``` -./easyedge_video_inference {模型RES文件夹路径} {video_type} {video_src_path} -``` - -其中 video_type 支持三种: - -``` - video_type : 1 // 本地视频文件 - video_type : 2 // 摄像头的index - video_type : 3 // 网络视频流 -``` - -video_src_path: 为 video_type 数值所对应的本地视频路径 、本地摄像头id、网络视频流地址,如: - -``` - 本地视频文件: ./easyedge_video_inference {模型RES文件夹路径} 1 ~/my_video_file.mp4 - 本地摄像头: ./easyedge_video_inference {模型RES文件夹路径} 2 1 #/dev/video1 - 网络视频流: ./easyedge_video_inference {模型RES文件夹路径} 3 rtmp://192.168.x.x:8733/live/src -``` - -注:以上路径是假模拟路径,开发者需要根据自己实际图像/视频,准备测试图像,并填写正确的测试路径。 - -## 3. 编译Demo - -通过[项目结构说明](#3)了解到,`bin`路径下的可执行文件 由`src`下的对应文件编译得到。 通过以下命令,即可完成`src`下的源码编译。 - -``` -cd src -mkdir build && cd build -cmake .. && make -``` - -至此,会在build文件夹下生成编译好的可执行文件,如图像推理的二进制文件:`build/demo_image_inference/easyedge_image_inference`。 - -# 预测API流程详解 - -本章节主要结合[2.测试Demo](#4)的Demo示例介绍推理API,方便开发者学习并将运行库嵌入到开发者的程序当中,更详细的API请参考`include/easyedge/easyedge*.h`文件。图像、视频的推理包含以下3个API,查看下面的cpp代码中的step注释说明。 - -> ❗注意:
-> (1)`src`文件夹中包含完整可编译的cmake工程实例,建议开发者先行了解[cmake工程基本知识](https://cmake.org/cmake/help/latest/guide/tutorial/index.html)。
-> (2)请优先参考SDK中自带的Demo工程的使用流程和说明。遇到错误,请优先参考文件中的注释、解释、日志说明。 - -```cpp - // step 1: SDK配置运行参数 - EdgePredictorConfig config; - config.model_dir = {模型文件目录}; - - // step 2: 创建并初始化Predictor;这这里选择合适的引擎 - auto predictor = global_controller()->CreateEdgePredictor(config); - - // step 3-1: 预测图像 - auto img = cv::imread({图片路径}); - std::vector results; - predictor->infer(img, results); - - // step 3-2: 预测视频 - std::vector results; - FrameTensor frame_tensor; - VideoConfig video_config; - video_config.source_type = static_cast(video_type); // source_type 定义参考头文件 easyedge_video.h - video_config.source_value = video_src; - /* - ... more video_configs, 根据需要配置video_config的各选项 - */ - auto video_decoding = CreateVideoDecoding(video_config); - while (video_decoding->next(frame_tensor) == EDGE_OK) { - results.clear(); - if (frame_tensor.is_needed) { - predictor->infer(frame_tensor.frame, results); - render(frame_tensor.frame, results, predictor->model_info().kind); - } - //video_decoding->display(frame_tensor); // 显示当前frame,需在video_config中开启配置 - //video_decoding->save(frame_tensor); // 存储当前frame到视频,需在video_config中开启配置 - } -``` - -若需自定义library search path或者gcc路径,修改对应Demo工程下的CMakeList.txt即可。 - -## 1. SDK参数运行配置 - -SDK的参数通过`EdgePredictorConfig::set_config`和`global_controller()->set_config`配置。本Demo 中设置了模型路径,其他参数保留默认参数。更详细的支持运行参数等,可以参考开发工具包中的头文件(`include/easyedge/easyedge_xxxx_config.h`)的详细说明。 - -配置参数使用方法如下: - -``` -EdgePredictorConfig config; -config.model_dir = {模型文件目录}; -``` - -## 2. 初始化Predictor - -* 接口 - - ```cpp - auto predictor = global_controller()->CreateEdgePredictor(config); - predictor->init(); - ``` - -若返回非0,请查看输出日志排查错误原因。 - -## 3. 预测推理 - -### 3.1 预测图像 - -> 在Demo中展示了预测接口infer()传入cv::Mat& image图像内容,并将推理结果赋值给std::vector& result。更多关于infer()的使用,可以根据参考`easyedge.h`头文件中的实际情况、参数说明自行传入需要的内容做推理 - -* 接口输入 - -```cpp - /** - * @brief - * 通用接口 - * @param image: must be BGR , HWC format (opencv default) - * @param result - * @return - */ - virtual int infer(cv::Mat& image, std::vector& result) = 0; -``` - - 图片的格式务必为opencv默认的BGR, HWC格式。 - -* 接口返回 - - `EdgeResultData`中可以获取对应的分类信息、位置信息。 - -```cpp -struct EdgeResultData { - int index; // 分类结果的index - std::string label; // 分类结果的label - float prob; // 置信度 - - // 物体检测 或 图像分割时使用: - float x1, y1, x2, y2; // (x1, y1): 左上角, (x2, y2): 右下角; 均为0~1的长宽比例值。 - - // 图像分割时使用: - cv::Mat mask; // 0, 1 的mask - std::string mask_rle; // Run Length Encoding,游程编码的mask -}; -``` - -*** 关于矩形坐标 *** - -x1 * 图片宽度 = 检测框的左上角的横坐标 - -y1 * 图片高度 = 检测框的左上角的纵坐标 - -x2 * 图片宽度 = 检测框的右下角的横坐标 - -y2 * 图片高度 = 检测框的右下角的纵坐标 - -*** 关于图像分割mask *** - -``` -cv::Mat mask为图像掩码的二维数组 -{ - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 1, 1, 1, 0, 0, 0, 0}, - {0, 0, 0, 1, 1, 1, 0, 0, 0, 0}, - {0, 0, 0, 1, 1, 1, 0, 0, 0, 0}, - {0, 0, 0, 1, 1, 1, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, -} -其中1代表为目标区域,0代表非目标区域 -``` - -*** 关于图像分割mask_rle *** - -该字段返回了mask的游程编码,解析方式可参考 [http demo](https://github.com/Baidu-AIP/EasyDL-Segmentation-Demo) - -以上字段可以参考demo文件中使用opencv绘制的逻辑进行解析 - -### 3.2 预测视频 - -SDK 提供了支持摄像头读取、视频文件和网络视频流的解析工具类`VideoDecoding`,此类提供了获取视频帧数据的便利函数。通过`VideoConfig`结构体可以控制视频/摄像头的解析策略、抽帧策略、分辨率调整、结果视频存储等功能。对于抽取到的视频帧可以直接作为SDK infer 接口的参数进行预测。 - -* 接口输入 - -class`VideoDecoding`: - -``` - /** - * @brief 获取输入源的下一帧 - * @param frame_tensor - * @return - */ - virtual int next(FrameTensor &frame_tensor) = 0; - - /** - * @brief 显示当前frame_tensor中的视频帧 - * @param frame_tensor - * @return - */ - virtual int display(const FrameTensor &frame_tensor) = 0; - - /** - * @brief 将当前frame_tensor中的视频帧写为本地视频文件 - * @param frame_tensor - * @return - */ - virtual int save(FrameTensor &frame_tensor) = 0; - - /** - * @brief 获取视频的fps属性 - * @return - */ - virtual int get_fps() = 0; - /** - * @brief 获取视频的width属性 - * @return - */ - virtual int get_width() = 0; - - /** - * @brief 获取视频的height属性 - * @return - */ - virtual int get_height() = 0; -``` - -struct `VideoConfig` - -``` -/** - * @brief 视频源、抽帧策略、存储策略的设置选项 - */ -struct VideoConfig { - SourceType source_type; // 输入源类型 - std::string source_value; // 输入源地址,如视频文件路径、摄像头index、网络流地址 - int skip_frames{0}; // 设置跳帧,每隔skip_frames帧抽取一帧,并把该抽取帧的is_needed置为true - int retrieve_all{false}; // 是否抽取所有frame以便于作为显示和存储,对于不满足skip_frames策略的frame,把所抽取帧的is_needed置为false - int input_fps{0}; // 在采取抽帧之前设置视频的fps - Resolution resolution{Resolution::kAuto}; // 采样分辨率,只对camera有效 - - bool enable_display{false}; // 默认不支持。 - std::string window_name{"EasyEdge"}; - bool display_all{false}; // 是否显示所有frame,若为false,仅显示根据skip_frames抽取的frame - - bool enable_save{false}; - std::string save_path; // frame存储为视频文件的路径 - bool save_all{false}; // 是否存储所有frame,若为false,仅存储根据skip_frames抽取的frame - - std::map conf; -}; -``` - -| 序号 | 字段 | 含义 | -| --- | -------------- | ---------------------------------------------------------------------------------------------------------------------------------- | -| 1 | `source_type` | 输入源类型,支持视频文件、摄像头、网络视频流三种,值分别为1、2、3 | -| 2 | `source_value` | 若`source_type`为视频文件,该值为指向视频文件的完整路径;若`source_type`为摄像头,该值为摄像头的index,如对于`/dev/video0`的摄像头,则index为0;若`source_type`为网络视频流,则为该视频流的完整地址。 | -| 3 | `skip_frames` | 设置跳帧,每隔skip_frames帧抽取一帧,并把该抽取帧的is_needed置为true,标记为is_needed的帧是用来做预测的帧。反之,直接跳过该帧,不经过预测。 | -| 4 | `retrieve_all` | 若置该项为true,则无论是否设置跳帧,所有的帧都会被抽取返回,以作为显示或存储用。 | -| 5 | `input_fps` | 用于抽帧前设置fps | -| 6 | `resolution` | 设置摄像头采样的分辨率,其值请参考`easyedge_video.h`中的定义,注意该分辨率调整仅对输入源为摄像头时有效 | -| 7 | `conf` | 高级选项。部分配置会通过该map来设置 | - -*** 注意:*** - -1. `VideoConfig`不支持`display`功能。如果需要使用`VideoConfig`的`display`功能,需要自行编译带有GTK选项的OpenCV。 - -2. 使用摄像头抽帧时,如果通过`resolution`设置了分辨率调整,但是不起作用,请添加如下选项: - - ``` - video_config.conf["backend"] = "2"; - ``` - -3.部分设备上的CSI摄像头尚未兼容,如遇到问题,可以通过工单、QQ交流群或微信交流群反馈。 - -具体接口调用流程,可以参考SDK中的`demo_video_inference`。 - -# FAQ - -1. 如何处理一些 undefined reference / error while loading shared libraries? - - > 如:./easyedge_demo: error while loading shared libraries: libeasyedge.so.1: cannot open shared object file: No such file or directory - - 遇到该问题时,请找到具体的库的位置,设置LD_LIBRARY_PATH;或者安装缺少的库。 - - > 示例一:libverify.so.1: cannot open shared object file: No such file or directory - > 链接找不到libveirfy.so文件,一般可通过 export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:../../lib 解决(实际冒号后面添加的路径以libverify.so文件所在的路径为准) - - > 示例二:libopencv_videoio.so.4.5: cannot open shared object file: No such file or directory - > 链接找不到libopencv_videoio.so文件,一般可通过 export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:../../thirdparty/opencv/lib 解决(实际冒号后面添加的路径以libopencv_videoio.so所在路径为准) - - > 示例三:GLIBCXX_X.X.X not found - > 链接无法找到glibc版本,请确保系统gcc版本>=SDK的gcc版本。升级gcc/glibc可以百度搜索相关文献。 - -2. 运行二进制时,提示 libverify.so cannot open shared object file - - 可能cmake没有正确设置rpath, 可以设置LD_LIBRARY_PATH为sdk的lib文件夹后,再运行: - - ```bash - LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../lib ./easyedge_demo - ``` - - 3. 编译时报错:file format not recognized - - 可能是因为在复制SDK时文件信息丢失。请将整个压缩包复制到目标设备中,再解压缩、编译。 diff --git a/docs/Linux-CPP-SDK-Serving.md b/docs/Linux-CPP-SDK-Serving.md deleted file mode 100644 index 2e585fe541..0000000000 --- a/docs/Linux-CPP-SDK-Serving.md +++ /dev/null @@ -1,329 +0,0 @@ -# 简介 - -本文档介绍FastDeploy中的模型SDK,在**X86 CPU/ NVIDIA GPU、Linux操作系统** 的C++环境:(1)HTTP服务化推理部署步骤,(2)介绍推理全流程API,方便开发者了解项目后二次开发。 -如果开发者对Python语言的相关能力感兴趣,可以参考Linux Python请参考[Linux Python环境下的推理部署](./Linux-Python-SDK-Serving.md)文档。 - -**【注意】**:OCR Demo 暂不支持服务化部署。 - - - -* [简介](#简介) - -* [安装准备](#安装准备) - - * [1. 硬件支持](#1-硬件支持) - * [2. 软件环境](#2-软件环境) - -* [快速开始](#快速开始) - - * [1. 项目结构说明](#1-项目结构说明) - * [2. 测试 HTTP Demo](#2-测试-http-demo) - * [2.1 启动HTTP预测服务](#21-启动http预测服务) - * [3. 编译Demo](#3-编译demo) - -* [HTTP API流程详解](#http-api流程详解) - - * [1. 开启http服务](#1-开启http服务) - * [2. 请求http服务](#2-请求http服务) - * [2.1 http 请求方式一:不使用图片base64格式](#21-http-请求方式一不使用图片base64格式) - * [2.2 http 请求方法二:使用图片base64格式](#22-http-请求方法二使用图片base64格式) - * [3. http返回数据](#3-http返回数据) - -* [FAQ](#faq) - - - -# 安装准备 - -## 1.硬件支持 - -- NVIDIA GPU: x86_64 - - cuda支持版本:CUDA10.0/10.1/10.2 + cuDNN 7 (cuDNN版本>=7.6.5) - - cuda支持版本:CUDA11.0 + cuDNN v8.0.4 -- CPU:Intel x86_64 - -## 2. 软件环境 - -1.运行二进制文件-环境要求 - -- gcc: 5.4 以上 (GLIBCXX_3.4.22) - - Linux下查看gcc版本命名(可能因系统差异命令会不同):`gcc --version`; - - Linux下C++基础库GLIBCXX的命令(可能因系统差异路径会有不同,可检测自己环境下的情况):`strings /usr/lib64/libstdc++.so.6 | grep GLIBCXX` -- glibc:2.23以上 - - Linux查看命令:`ldd --version` - -2.二次开发编译-环境要求 - -编译源代码时,除了gcc、GLIBCXX、glibc满足`1.运行二进制文件-环境要求`外,还需要cmake满足要求。 - -- cmake: 3.0 以上 - - - Linux查看命令:`cmake --version` - -# 快速开始 - -## 1. 项目结构说明 - -根据开发者模型、部署芯片、操作系统需要,在图像界面[飞桨开源模型](https://ai.baidu.com/easyedge/app/openSource)或[GIthub](https://github.com/PaddlePaddle/FastDeploy)中选择对应的SDK进行下载。SDK目录结构如下: - -``` -.EasyEdge-Linux-硬件芯片 -├── RES # 模型资源文件夹,一套模型适配不同硬件、OS和部署方式 -│ ├── conf.json        # Android、iOS系统APP名字需要 -│ ├── model # 模型结构文件 -│ ├── params # 模型参数文件 -│ ├── label_list.txt # 模型标签文件 -│ ├── infer_cfg.json # 模型前后处理等配置文件 -├── ReadMe.txt -├── cpp # C++ SDK 文件结构 - └── baidu_easyedge_linux_cpp_x86_64_CPU.Generic_gcc5.4_v1.4.0_20220325.tar.gz - ├── ReadMe.txt - ├── bin # 可直接运行的二进制文件 - ├── include # 二次开发用的头文件 - ├── lib # 二次开发用的所依赖的库 - ├── src # 二次开发用的示例工程 - └── thirdparty # 第三方依赖 -└── python # Python SDK 文件 -``` - -``` - -``` - -## 2. 测试 HTTP Demo - -> 模型资源文件(即压缩包中的RES文件夹)默认已经打包在开发者下载的SDK包中,请先将tar包整体拷贝到具体运行的设备中,再解压缩使用。 - -SDK中已经包含预先编译的二进制,可直接运行。以下运行示例均是`cd cpp/bin`路径下执行的结果。 - -### 2.1. 启动HTTP预测服务 - -``` -./easyedge_serving {模型RES文件夹路径} -``` - -启动后,日志中会显示如下设备IP和24401端口号信息: - -``` -HTTP is now serving at 0.0.0.0:24401 -``` - -此时,开发者可以打开浏览器,输入链接地址`http://0.0.0.0:24401`(这里的`设备IP和24401端口号`根据开发者电脑显示修改),选择图片来进行测试。 - -
- -同时,可以调用HTTP接口来访问服务,具体参考下文的[二次开发](#10)接口说明。 - -## 3. 编译Demo - -通过[项目结构说明](#3)了解到,`bin`路径下的可执行文件是由`src`下的对应文件编译得到的。 该部分说明C++编译命令。 - -``` -cd src -mkdir build && cd build -cmake .. && make -``` - -至此,会在build文件夹下生成编译好的可执行文件,如图像推理的二进制文件:`build/demo_serving/easyedge_serving`。 - -# HTTP API流程详解 - -本章节主要结合[2.1 HTTP Demo](#4)的API介绍,方便开发者学习并将运行库嵌入到开发者的程序当中,更详细的API请参考`include/easyedge/easyedge*.h`文件。http服务包含服务端和客户端,目前支持的能力包括以下几种方式,Demo中提供了不使用图片base格式的`方式一:浏览器请求的方式`,其他几种方式开发者根据个人需要,选择开发。 - -## 1. 开启http服务 - -http服务的启动可直接使用`bin/easyedge_serving`,或参考`src/demo_serving.cpp`文件修改相关逻辑 - -```cpp - /** - * @brief 开启一个简单的demo http服务。 - * 该方法会block直到收到sigint/sigterm。 - * http服务里,图片的解码运行在cpu之上,可能会降低推理速度。 - * @tparam ConfigT - * @param config - * @param host - * @param port - * @param service_id service_id user parameter, uri '/get/service_id' will respond this value with 'text/plain' - * @param instance_num 实例数量,根据内存/显存/时延要求调整 - * @return - */ - template - int start_http_server( - const ConfigT &config, - const std::string &host, - int port, - const std::string &service_id, - int instance_num = 1); -``` - -## 2. 请求http服务 - -> 开发者可以打开浏览器,`http://{设备ip}:24401`,选择图片来进行测试。 - -### 2.1 http 请求方式一:不使用图片base64格式 - -URL中的get参数: - -| 参数 | 说明 | 默认值 | -| --------- | --------- | ---------------- | -| threshold | 阈值过滤, 0~1 | 如不提供,则会使用模型的推荐阈值 | - -HTTP POST Body即为图片的二进制内容(无需base64, 无需json) - -Python请求示例 - -```Python -import requests - -with open('./1.jpg', 'rb') as f: - img = f.read() - result = requests.post( - 'http://127.0.0.1:24401/', - params={'threshold': 0.1}, - data=img).json() -``` - -### 2.2 http 请求方法二:使用图片base64格式 - -HTTP方法:POST -Header如下: - -| 参数 | 值 | -| ------------ | ---------------- | -| Content-Type | application/json | - -**Body请求填写**: - -* 分类网络: - body 中请求示例 - - ``` - { - "image": "" - "top_num": 5 - } - ``` - - body中参数详情 - -| 参数 | 是否必选 | 类型 | 可选值范围 | 说明 | -| ------- | ---- | ------ | ----- | ----------------------------------------------------------------------------------- | -| image | 是 | string | - | 图像数据,base64编码,要求base64图片编码后大小不超过4M,最短边至少15px,最长边最大4096px,支持jpg/png/bmp格式 **注意去掉头部** | -| top_num | 否 | number | - | 返回分类数量,不填该参数,则默认返回全部分类结果 | - -* 检测和分割网络: - Body请求示例: - - ``` - { - "image": "" - } - ``` - - body中参数详情: - -| 参数 | 是否必选 | 类型 | 可选值范围 | 说明 | -| --------- | ---- | ------ | ----- | ----------------------------------------------------------------------------------- | -| image | 是 | string | - | 图像数据,base64编码,要求base64图片编码后大小不超过4M,最短边至少15px,最长边最大4096px,支持jpg/png/bmp格式 **注意去掉头部** | -| threshold | 否 | number | - | 默认为推荐阈值,也可自行根据需要进行设置 | - -Python请求示例 - -```python -import base64 -import requests - -def main(): - with open("图像路径", 'rb') as f: - result = requests.post("http://{服务ip地址}:24401/", json={ - "image": base64.b64encode(f.read()).decode("utf8") - }) - # print(result.request.body) - # print(result.request.headers) - print(result.content) - -if __name__ == '__main__': - main() -``` - -### 3. http返回数据 - -| 字段 | 类型说明 | 其他 | -| ---------- | ------ | ------------------------------------ | -| error_code | Number | 0为成功,非0参考message获得具体错误信息 | -| results | Array | 内容为具体的识别结果。其中字段的具体含义请参考`预测图像-返回格式`一节 | -| cost_ms | Number | 预测耗时ms,不含网络交互时间 | - -返回示例 - -```json -{ - "cost_ms": 52, - "error_code": 0, - "results": [ - { - "confidence": 0.94482421875, - "index": 1, - "label": "IronMan", - "x1": 0.059185408055782318, - "x2": 0.18795496225357056, - "y1": 0.14762254059314728, - "y2": 0.52510076761245728, - "mask": "...", // 图像分割模型字段 - "trackId": 0, // 目标追踪模型字段 - }, - - ] -} -``` - -*** 关于矩形坐标 *** - -x1 * 图片宽度 = 检测框的左上角的横坐标 - -y1 * 图片高度 = 检测框的左上角的纵坐标 - -x2 * 图片宽度 = 检测框的右下角的横坐标 - -y2 * 图片高度 = 检测框的右下角的纵坐标 - -*** 关于分割模型 *** - -其中,mask为分割模型的游程编码,解析方式可参考 [http demo](https://github.com/Baidu-AIP/EasyDL-Segmentation-Demo) - -# FAQ - -1. 如何处理一些 undefined reference / error while loading shared libraries? - -> 如:./easyedge_demo: error while loading shared libraries: libeasyedge.so.1: cannot open shared object file: No such file or directory - -遇到该问题时,请找到具体的库的位置,设置LD_LIBRARY_PATH;或者安装缺少的库。 - -> 示例一:libverify.so.1: cannot open shared object file: No such file or directory -> 链接找不到libveirfy.so文件,一般可通过 export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:../../lib 解决(实际冒号后面添加的路径以libverify.so文件所在的路径为准) - -> 示例二:libopencv_videoio.so.4.5: cannot open shared object file: No such file or directory -> 链接找不到libopencv_videoio.so文件,一般可通过 export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:../../thirdparty/opencv/lib 解决(实际冒号后面添加的路径以libopencv_videoio.so所在路径为准) - -> 示例三:GLIBCXX_X.X.X not found -> 链接无法找到glibc版本,请确保系统gcc版本>=SDK的gcc版本。升级gcc/glibc可以百度搜索相关文献。 - -2. 使用libcurl请求http服务时,速度明显变慢 - -这是因为libcurl请求continue导致server等待数据的问题,添加空的header即可 - -```bash -headers = curl_slist_append(headers, "Expect:"); -``` - -3. 运行二进制时,提示 libverify.so cannot open shared object file - -可能cmake没有正确设置rpath, 可以设置LD_LIBRARY_PATH为sdk的lib文件夹后,再运行: - -```bash -LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../lib ./easyedge_demo -``` - -4. 编译时报错:file format not recognized - -可能是因为在复制SDK时文件信息丢失。请将整个压缩包复制到目标设备中,再解压缩、编译。 diff --git a/docs/Linux-Python-SDK-Inference.md b/docs/Linux-Python-SDK-Inference.md deleted file mode 100644 index 98dbc3a247..0000000000 --- a/docs/Linux-Python-SDK-Inference.md +++ /dev/null @@ -1,369 +0,0 @@ -# 简介 - -本文档介绍FastDeploy中的模型SDK,在**Intel x86_64 / NVIDIA GPU Linux Python** 环境下: (1)图像推理部署步骤; (2)介绍模型推流全流程API,方便开发者了解项目后二次开发。 -其中Linux C++请参考[Linux CPP环境下的推理部署](./Linux-CPP-SDK-Inference.md)文档。 - - - -* [简介](#简介) - -* [环境准备](#环境准备) - - * [1. SDK下载](#1-sdk下载) - * [2. Python环境](#2-python环境) - * [3. 安装依赖](#3-安装依赖) - * [3.1 安装paddlepaddle](#31-安装paddlepaddle) - * [3.2 安装EasyEdge Python Wheel 包](#32-安装easyedge-python-wheel-包) - -* [快速开始](#快速开始) - - * [1. 文件结构说明](#1-文件结构说明) - * [2. 测试Demo](#2-测试demo) - * [2.1 预测图像](#21-预测图像) - -* [预测API流程详解](#预测api流程详解) - - * [1. 基础流程](#1-基础流程) - * [2. 初始化](#2-初始化) - * [3. SDK参数配置](#3-sdk参数配置) - * [4. 预测图像](#4-预测图像) - -* [FAQ](#faq) - - - -# 环境准备 - -## 1. SDK下载 - -根据开发者模型、部署芯片、操作系统需要,在图像界面[飞桨开源模型](https://ai.baidu.com/easyedge/app/openSource)或[GIthub](https://github.com/PaddlePaddle/FastDeploy)中选择对应的SDK进行下载。解压后SDK目录结构如下: - -```shell -EasyEdge-Linux-x86-[部署芯片] -├── RES # 模型文件资源文件夹,可替换为其他模型 -├── README.md -├── cpp # C++ SDK -└── python # Python SDK -``` - -## 2. Python环境 - -> 当前SDK仅支持Python 3.5, 3.6, 3.7 - -使用如下命令获取已安装的Python版本号。如果本机的版本不匹配,建议使用[pyenv](https://github.com/pyenv/pyenv)、[anaconda](https://www.anaconda.com/)等Python版本管理工具对SDK所在目录进行配置。 - -```shell -$python3 --version -``` - -接着使用如下命令确认pip的版本是否满足要求,要求pip版本为20.2.2或更高版本。详细的pip安装过程可以参考[官网教程](https://pip.pypa.io/en/stable/installation/)。 - -```shell -$python3 -m pip --version -``` - -## 3. 安装依赖 - -### 3.1 安装paddlepaddle - -根据具体的部署芯片(CPU/GPU)安装对应的PaddlePaddle的whl包。 - -`x86_64 CPU` 平台可以使用如下命令进行安装: - -```shell -python3 -m pip install paddlepaddle==2.2.2 -i https://mirror.baidu.com/pypi/simple -``` - -NVIDIA GPU平台的详细安装教程可以参考[官网Paddle安装教程](https://www.paddlepaddle.org.cn/install/quick?docurl=/documentation/docs/zh/install/pip/linux-pip.html)。 - -> 使用 NVIDIA GPU 预测时,必须满足: -> -> 1. 机器已安装 cuda, cudnn -> 2. 已正确安装对应 cuda 版本的paddle 版本 -> 3. 通过设置环境变量`FLAGS_fraction_of_gpu_memory_to_use`设置合理的初始内存使用比例 - -### 3.2 安装EasyEdge Python Wheel 包 - -在`python`目录下,安装特定Python版本的EasyEdge Wheel包。对`x86_64 CPU` 或 `x86_64 Nvidia GPU平台 `可以使用如下命令进行安装,具体名称以 Python SDK 包中的 whl 为准。 - -```shell -python3 -m pip install -U BaiduAI_EasyEdge_SDK-{SDK版本号}-cp{Python版本号}-cp{Python版本号}m-linux_x86_64.whl -``` - -`armv8 CPU平台`可以使用如下命令进行安装: - -```shell -python3 -m pip install -U BaiduAI_EasyEdge_SDK-{版本号}-cp36-cp36m-linux_aarch64.whl -``` - -# 快速开始 - -## 1. 文件结构说明 - -Python SDK文件结构如下: - -```shell -EasyEdge-Linux-x86--[部署芯片] -├──... -├──python # Linux Python SDK - ├── # 特定Python版本的EasyEdge Wheel包, 二次开发可使用 - ├── BaiduAI_EasyEdge_SDK-1.2.8-cp35-cp35m-linux_x86_64.whl - ├── BaiduAI_EasyEdge_SDK-1.2.8-cp36-cp36m-linux_x86_64.whl - ├── BaiduAI_EasyEdge_SDK-1.2.8-cp37-cp37m-linux_x86_64.whl - ├── infer_demo # demo体验完整文件 - │ ├── demo_xxx.py # 包含前后处理的端到端推理demo文件 - │ └── demo_serving.py # 提供http服务的demo文件 - ├── tensor_demo # tensor in/out demo文件 - │ └── demo_xxx.py -``` - -## 2. 测试Demo - -> 模型资源文件默认已经打包在开发者下载的SDK包中, 默认为`RES`目录。 - -### 2.1 预测图像 - -使用infer_demo文件夹下的demo文件。 - -```bash -python3 demo_x86_cpu.py {模型RES文件夹} {测试图片路径} -``` - -运行效果示例: - -
- -```shell -2022-06-14 14:40:16 INFO [EasyEdge] [demo_nvidia_gpu.py:38] 140518522509120: Init paddlefluid engine... -2022-06-14 14:40:20 INFO [EasyEdge] [demo_nvidia_gpu.py:38] 140518522509120: Paddle version: 2.2.2 -{'confidence': 0.9012349843978882, 'index': 8, 'label': 'n01514859 hen'} -``` - -可以看到,运行结果为`index:8,label:hen`,通过imagenet [类别映射表](https://gist.github.com/yrevar/942d3a0ac09ec9e5eb3a),可以找到对应的类别,即 'hen',由此说明我们的预测结果正确。 - -# 预测API流程详解 - -本章节主要结合前文的Demo示例来介绍推理API,方便开发者学习并将运行库嵌入到开发者的程序当中,更详细的API请参考`infer_demo/demo_xx_xx.py`文件,查看下面的Python代码中的step注释说明。 - -## 1. 基础流程 - -> ❗注意,请优先参考SDK中自带demo的使用流程和说明。遇到错误,请优先参考文件中的注释、解释、日志说明。 - -`infer_demo/demo_xx_xx.py` - -```python -# 引入EasyEdge运行库 -import BaiduAI.EasyEdge as edge - -# 创建并初始化一个预测Progam;选择合适的引擎 -pred = edge.Program() -pred.init(model_dir={RES文件夹路径}, device=edge.Device.CPU, engine=edge.Engine.PADDLE_FLUID) # x86_64 CPU -# pred.init(model_dir=_model_dir, device=edge.Device.GPU, engine=edge.Engine.PADDLE_FLUID) # x86_64 Nvidia GPU -# pred.init(model_dir=_model_dir, device=edge.Device.CPU, engine=edge.Engine.PADDLE_LITE) # armv8 CPU - -# 预测图像 -res = pred.infer_image({numpy.ndarray的图片}) - -# 关闭结束预测Progam -pred.close() -``` - -`infer_demo/demo_serving.py` - -```python -import BaiduAI.EasyEdge as edge -from BaiduAI.EasyEdge.serving import Serving - -# 创建并初始化Http服务 -server = Serving(model_dir={RES文件夹路径}, license=serial_key) - -# 运行Http服务 -# 请参考同级目录下demo_xx_xx.py里: -# pred.init(model_dir=xx, device=xx, engine=xx, device_id=xx) -# 对以下参数device\device_id和engine进行修改 -server.run(host=host, port=port, device=edge.Device.CPU, engine=edge.Engine.PADDLE_FLUID) # x86_64 CPU -# server.run(host=host, port=port, device=edge.Device.GPU, engine=edge.Engine.PADDLE_FLUID) # x86_64 Nvidia GPU -# server.run(host=host, port=port, device=edge.Device.CPU, engine=edge.Engine.PADDLE_LITE) # armv8 CPU -``` - -## 2. 初始化 - -- 接口 - -```python - def init(self, - model_dir, - device=Device.CPU, - engine=Engine.PADDLE_FLUID, - config_file='conf.json', - preprocess_file='preprocess_args.json', - model_file='model', - params_file='params', - label_file='label_list.txt', - infer_cfg_file='infer_cfg.json', - device_id=0, - thread_num=1 - ): - """ - Args: - model_dir: str - device: BaiduAI.EasyEdge.Device,比如:Device.CPU - engine: BaiduAI.EasyEdge.Engine, 比如: Engine.PADDLE_FLUID - config_file: str - preprocess_file: str - model_file: str - params_file: str - label_file: str 标签文件 - infer_cfg_file: 包含预处理、后处理信息的文件 - device_id: int 设备ID - thread_num: int CPU的线程数 - - Raises: - RuntimeError, IOError - Returns: - bool: True if success - """ -``` - -若返回不是True,请查看输出日志排查错误原因。 - -## 3. SDK参数配置 - -使用 CPU 预测时,可以通过在 init 中设置 thread_num 使用多线程预测。如: - -```python -pred.init(model_dir=_model_dir, device=edge.Device.CPU, engine=edge.Engine.PADDLE_FLUID, thread_num=4) -``` - -使用 GPU 预测时,可以通过在 init 中设置 device_id 指定需要的GPU device id。如: - -```python -pred.init(model_dir=_model_dir, device=edge.Device.GPU, engine=edge.Engine.PADDLE_FLUID, device_id=0) -``` - -## 4. 预测图像 - -- 接口 - -```python - def infer_image(self, img, - threshold=0.3, - channel_order='HWC', - color_format='BGR', - data_type='numpy') - """ - - Args: - img: np.ndarray or bytes - threshold: float - only return result with confidence larger than threshold - channel_order: string - channel order HWC or CHW - color_format: string - color format order RGB or BGR - data_type: string - 仅在图像分割时有意义。 'numpy' or 'string' - 'numpy': 返回已解析的mask - 'string': 返回未解析的mask游程编码 - - Returns: - list - - """ -``` - -| 字段 | 类型 | 取值 | 说明 | -| ---------- | -------------------- | --------- | ------------------------ | -| confidence | float | 0~1 | 分类或检测的置信度 | -| label | string | | 分类或检测的类别 | -| index | number | | 分类或检测的类别 | -| x1, y1 | float | 0~1 | 物体检测,矩形的左上角坐标 (相对长宽的比例值) | -| x2, y2 | float | 0~1 | 物体检测,矩形的右下角坐标(相对长宽的比例值) | -| mask | string/numpy.ndarray | 图像分割的mask | | - -***关于矩形坐标*** - -x1 * 图片宽度 = 检测框的左上角的横坐标 - -y1 * 图片高度 = 检测框的左上角的纵坐标 - -x2 * 图片宽度 = 检测框的右下角的横坐标 - -y2 * 图片高度 = 检测框的右下角的纵坐标 - -可以参考 demo 文件中使用 opencv 绘制矩形的逻辑。 - -***结果示例*** - -i) 图像分类 - -```json -{ - "index": 736, - "label": "table", - "confidence": 0.9 -} -``` - -ii) 物体检测 - -```json -{ - "index": 8, - "label": "cat", - "confidence": 1.0, - "x1": 0.21289, - "y1": 0.12671, - "x2": 0.91504, - "y2": 0.91211, -} -``` - -iii) 图像分割 - -```json -{ - "name": "cat", - "score": 1.0, - "location": { - "left": ..., - "top": ..., - "width": ..., - "height": ..., - }, - "mask": ... -} -``` - -mask字段中,data_type为`numpy`时,返回图像掩码的二维数组 - -```text -{ - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 1, 1, 1, 0, 0, 0, 0}, - {0, 0, 0, 1, 1, 1, 0, 0, 0, 0}, - {0, 0, 0, 1, 1, 1, 0, 0, 0, 0}, - {0, 0, 0, 1, 1, 1, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, -} -其中1代表为目标区域,0代表非目标区域 -``` - -data_type为`string`时,mask的游程编码,解析方式可参考 [demo](https://github.com/Baidu-AIP/EasyDL-Segmentation-Demo)。 - - -# FAQ - -1. 执行infer_demo文件时,提示your generated code is out of date and must be regenerated with protoc >= 3.19.0 - - 进入当前项目,首先卸载protobuf - -```shell -python3 -m pip uninstall protobuf -``` - -       安装低版本protobuf - -```shell -python3 -m pip install protobuf==3.19.0 -``` diff --git a/docs/Linux-Python-SDK-Serving.md b/docs/Linux-Python-SDK-Serving.md deleted file mode 100644 index 56cee55854..0000000000 --- a/docs/Linux-Python-SDK-Serving.md +++ /dev/null @@ -1,268 +0,0 @@ -# 简介 - -本文档以[千分类模型_MobileNetV3](https://ai.baidu.com/easyedge/app/openSource)为例,介绍 FastDeploy中的模型SDK ,在**Intel x86_64 / NVIDIA GPU Linux Python** 环境下: (1)SDK **服务化**推理部署步骤; (2)介绍模型推流全流程API,方便开发者了解项目后二次开发。 -其中Linux C++请参考[Linux C++环境下的服务化推理部署](./Linux-CPP-SDK-Serving.md)文档。 - -**【注意】**:OCR Demo 暂不支持服务化部署。 - - - -* [简介](#简介) - -* [环境准备](#环境准备) - - * [1. SDK下载](#1-sdk下载) - * [2. Python环境](#2-python环境) - * [3. 安装依赖](#3-安装依赖) - * [3.1 安装paddlepaddle](#31-安装paddlepaddle) - * [3.2 安装EasyEdge Python Wheel 包](#32-安装easyedge-python-wheel-包) - -* [快速开始](#快速开始) - - * [1. 文件结构说明](#1-文件结构说明) - * [2. 测试Serving服务](#2-测试serving服务) - * [2.1 启动HTTP预测服务](#21-启动http预测服务) - -* [HTTP API流程详解](#http-api流程详解) - - * [1. 开启http服务](#1-开启http服务) - * [2. 请求http服务](#2-请求http服务) - * [2.1 http 请求方式:不使用图片base64格式](#21-http-请求方式不使用图片base64格式) - * [3. http 返回数据](#3-http-返回数据) - -* [FAQ](#faq) - - - -# 环境准备 - -## 1. SDK下载 - -根据开发者模型、部署芯片、操作系统需要,在图像界面[飞桨开源模型](https://ai.baidu.com/easyedge/app/openSource)或[GIthub](https://github.com/PaddlePaddle/FastDeploy)中选择对应的SDK进行下载。解压后SDK目录结构如下: - -```shell -EasyEdge-Linux-x86-[部署芯片] -├── RES # 模型文件资源文件夹,可替换为其他模型 -├── README.md -├── cpp # C++ SDK -└── python # Python SDK -``` - -## 2. Python环境 - -> 当前SDK仅支持Python 3.5, 3.6, 3.7 - -使用如下命令获取已安装的Python版本号。如果本机的版本不匹配,建议使用[pyenv](https://github.com/pyenv/pyenv)、[anaconda](https://www.anaconda.com/)等Python版本管理工具对SDK所在目录进行配置。 - -```shell -$python3 --version -``` - -接着使用如下命令确认pip的版本是否满足要求,要求pip版本为20.2.2或更高版本。详细的pip安装过程可以参考[官网教程](https://pip.pypa.io/en/stable/installation/)。 - -```shell -$python3 -m pip --version -``` - -## 3. 安装依赖 - -### 3.1 安装paddlepaddle - -根据具体的部署芯片(CPU/GPU)安装对应的PaddlePaddle的whl包。 - -1.`x86_64 CPU` 平台可以使用如下命令进行安装: - - ```shell - python3 -m pip install paddlepaddle==2.2.2 -i https://mirror.baidu.com/pypi/simple - ``` - -2.`x86_64 NVIDIA GPU` 支持的CUDA和CUDNN版本与PaddlePaddle框架保持一致,如下: - - * CUDA 工具包10.1/10.2配合cuDNN 7 (cuDNN版本>=7.6.5, 如需多卡支持,需配合NCCL2.7及更高) - * CUDA 工具包11.0配合cuDNN v8.0.4(如需多卡支持,需配合NCCL2.7及更高) - * CUDA 工具包11.1配合cuDNN v8.1.1(如需多卡支持,需配合NCCL2.7及更高) - * CUDA 工具包11.2配合cuDNN v8.1.1(如需多卡支持,需配合NCCL2.7及更高) - - 具体安装命令,参考[官网Paddle安装教程](https://www.paddlepaddle.org.cn/install/quick?docurl=/documentation/docs/zh/install/pip/linux-pip.html)。 - -> 使用 NVIDIA GPU 预测时,必须满足: -> -> 1. 机器已安装 cuda, cudnn -> 2. 已正确安装对应 cuda 版本的paddle 版本 -> 3. 通过设置环境变量`FLAGS_fraction_of_gpu_memory_to_use`设置合理的初始内存使用比例 - -### 3.2 安装EasyEdge Python Wheel 包 - -在`python`目录下,安装特定Python版本的EasyEdge Wheel包。对`x86_64 CPU` 或 `x86_64 Nvidia GPU平台 `可以使用如下命令进行安装,具体名称以 Python SDK 包中的 whl 为准。 - -```shell -python3 -m pip install -U BaiduAI_EasyEdge_SDK-{SDK版本号}-cp{Python版本号}-cp{Python版本号}m-linux_x86_64.whl -``` - -`armv8 CPU平台`可以使用如下命令进行安装: - -```shell -python3 -m pip install -U BaiduAI_EasyEdge_SDK-{SDK版本号}-cp36-cp36m-linux_aarch64.whl -``` - -# 快速开始 - -## 1. 文件结构说明 - -Python SDK文件结构如下: - -```shell -EasyEdge-Linux-x86--[部署芯片] -├──... -├──python # Linux Python SDK - ├── # 特定Python版本的EasyEdge Wheel包, 二次开发可使用 - ├── BaiduAI_EasyEdge_SDK-1.2.8-cp35-cp35m-linux_x86_64.whl - ├── BaiduAI_EasyEdge_SDK-1.2.8-cp36-cp36m-linux_x86_64.whl - ├── BaiduAI_EasyEdge_SDK-1.2.8-cp37-cp37m-linux_x86_64.whl - ├── infer_demo # demo体验完整文件 - │ ├── demo_xxx.py # 包含前后处理的端到端推理demo文件 - │ └── demo_serving.py # 提供http服务的demo文件 - ├── tensor_demo # 学习自定义算法前后处理时使用 - │ └── demo_xxx.py -``` - -## 2. 测试Serving服务 - -> 模型资源文件默认已经打包在开发者下载的SDK包中, 默认为`RES`目录。 - -### 2.1 启动HTTP预测服务 - -指定对应的模型文件夹(默认为`RES`)、设备ip和指定端口号,运行如下命令。 - -```shell -python3 demo_serving.py {模型RES文件夹} {host, default 0.0.0.0} {port, default 24401} -``` - -成功启动后,终端中会显示如下字样。 - -```shell -... -* Running on {host ip}:24401 -``` - -如果是在局域网内的机器上部署,开发者此时可以打开浏览器,输入`http://{host ip}:24401`,选择图片来进行测试,运行效果如下。 - - - -如果是在远程机器上部署,那么可以参考`demo_serving.py`中的 `http_client_test()函数`请求http服务来执行推理。 - -# HTTP API流程详解 - -本章节主要结合前文的Demo示例来对API进行介绍,方便开发者学习并将运行库嵌入到开发者的程序当中,更详细的API请参考对应的Python文件。http服务包含服务端和客户端,Demo中提供了不使用图片base格式的`方式一:浏览器请求的方式`,其他几种方式开发者根据个人需要,选择开发。 - -## 1. 开启http服务 - -http服务的启动使用`demo_serving.py`文件 - -```python -class Serving(object): - """ SDK local serving """ - - def __init__(self, model_dir, license='', model_filename='model', params_filename='params'): - - self.program = None - self.model_dir = model_dir - self.model_filename = model_filename - self.params_filename = params_filename - self.program_lock = threading.Lock() - self.license_key = license - # 只有ObjectTracking会初始化video_processor - self.video_processor = None - - def run(self, host, port, device, engine=Engine.PADDLE_FLUID, service_id=0, device_id=0, **kwargs): - """ Args: host : str port : str device : BaiduAI.EasyEdge.Device,比如:Device.CPU engine : BaiduAI.EasyEdge.Engine, 比如: Engine.PADDLE_FLUID """ - self.run_serving_with_flask(host, port, device, engine, service_id, device_id, **kwargs) -``` - -## 2. 请求http服务 - -> 开发者可以打开浏览器,`http://{设备ip}:24401`,选择图片来进行测试。 - -### 2.1 http 请求方式:不使用图片base64格式 - -URL中的get参数: - -| 参数 | 说明 | 默认值 | -| --------- | --------- | ---------------- | -| threshold | 阈值过滤, 0~1 | 如不提供,则会使用模型的推荐阈值 | - -HTTP POST Body即为图片的二进制内容。 - -Python请求示例 - -```python -import requests - -with open('./1.jpg', 'rb') as f: - img = f.read() - result = requests.post( - 'http://127.0.0.1:24401/', - params={'threshold': 0.1}, - data=img).json() -``` - -## 3. http 返回数据 - -| 字段 | 类型说明 | 其他 | -| ---------- | ------ | ------------------------------------ | -| error_code | Number | 0为成功,非0参考message获得具体错误信息 | -| results | Array | 内容为具体的识别结果。其中字段的具体含义请参考`预测图像-返回格式`一节 | -| cost_ms | Number | 预测耗时ms,不含网络交互时间 | - -返回示例 - -```json -{ - "cost_ms": 52, - "error_code": 0, - "results": [ - { - "confidence": 0.94482421875, - "index": 1, - "label": "IronMan", - "x1": 0.059185408055782318, - "x2": 0.18795496225357056, - "y1": 0.14762254059314728, - "y2": 0.52510076761245728, - "mask": "...", // 图像分割模型字段 - "trackId": 0, // 目标追踪模型字段 - }, - - ] -} -``` - -***关于矩形坐标*** - -x1 * 图片宽度 = 检测框的左上角的横坐标 - -y1 * 图片高度 = 检测框的左上角的纵坐标 - -x2 * 图片宽度 = 检测框的右下角的横坐标 - -y2 * 图片高度 = 检测框的右下角的纵坐标 - -***关于分割模型*** - -其中,mask为分割模型的游程编码,解析方式可参考 [demo](https://github.com/Baidu-AIP/EasyDL-Segmentation-Demo)。 - -# FAQ - -1. 执行infer_demo文件时,提示your generated code is out of date and must be regenerated with protoc >= 3.19.0 - -进入当前项目,首先卸载protobuf - -```shell -python3 -m pip uninstall protobuf -``` - -安装低版本protobuf - -```shell -python3 -m pip install protobuf==3.19.0 -``` diff --git a/docs/Replace-Model-With-Anther-One.md b/docs/Replace-Model-With-Anther-One.md deleted file mode 100644 index 1e6f7436a1..0000000000 --- a/docs/Replace-Model-With-Anther-One.md +++ /dev/null @@ -1,266 +0,0 @@ - -# 简介 - -本文档介绍如何将FastDeploy的Demo模型,替换成开发者自己训练的AI模型。(**注意**:FastDeploy下载的SDK和Demo仅支持相同算法模型的替换)。本文档要求开发者已经将Demo和SDK运行跑通,如果要了解运行跑通Demo和SDK指导文档,可以参考[SDK使用文档](https://github.com/PaddlePaddle/FastDeploy/blob/develop/README.md#sdk使用) - -* [简介](#0)
-* [模型替换](#1)
- * [1.模型准备](#2)
- * [1.1 Paddle模型](#3)
- * [1.2 Paddle OCR模型增加一步特殊转换](#4)
- * [1.2.1 下载模型转换工具](#5)
- * [1.2.2 下载模型转换工具](#6)
- * [1.3 其他框架模型](#7)
- * [2.模型名修改和label文件准备](#8)
- * [2.1 非OCR模型名修改](#9)
- * [2.2 OCR模型名修改](#10)
- * [2.3 模型label文件](#11)
- * [3.修改配置文件](#12)
-* [测试效果](#13)
-* [完整配置文件说明](#14)
- * [1.配置文件字段含义](#15)
- * [2.预处理顺序](#16)
-* [FAQ](#17)
- -**注意事项:** - -1. PP-PicoDet模型: 在FastDeploy中,支持PP-Picodet模型,是将后处理写到网络里面的方式(即后处理+NMS都在网络结构里面)。Paddle Detection导出静态模型时,有3种方法,选择将后处理和NMS导入到网络里面即可(参考[导出部分](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.4/configs/picodet#%E5%AF%BC%E5%87%BA%E5%8F%8A%E8%BD%AC%E6%8D%A2%E6%A8%A1%E5%9E%8B))。详细网络区别,可以通过netron工具对比。 - -2. PP-Picodet模型:在FastDeploy中,支持PP-Picodet模型,是将前处理写在网络外面的方式。Paddle Detection中的TinyPose算法中,会将PP-PicoDet模型的前处理写入网络中。如果要使用FastDeploy的SDK进行模型替换,需要将前处理写到网络外面。(参考[Detection中的导出命令](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.4/configs/keypoint/tiny_pose#%E5%B0%86%E8%AE%AD%E7%BB%83%E7%9A%84%E6%A8%A1%E5%9E%8B%E5%AE%9E%E7%8E%B0%E7%AB%AF%E4%BE%A7%E9%83%A8%E7%BD%B2),将TestReader.fuse_normalize=False即可)。 - - - -# 模型替换 - -开发者从PaddleDetection、PaddleClas、PaddleOCR、PaddleSeg等飞桨开发套件导出来的对应模型,完成 [1.模型准备](#)、[1.模型名修改和模型label](#)、[3.修改配置文件](#) 3步操作(需要相同算法才可替换),可完成自定义模型的模型文件,运行时指定新的模型文件,即可在自己训练的模型上实现相应的预测推理任务。 - -* Linux下模型资源文件夹路径:`EasyEdge-Linux-**/RES/` 。 -* Windows下模型资源文件夹路径:`EasyEdge-Windows-**/data/model/`。 -* Android下模型资源文件夹路径:`EasyEdge-Android-**/app/src/assets/infer/` 和 ` app/src/assets/demo/conf.json` -* iOS下模型资源文件夹路径:`EasyEdge-iOS-**/RES/easyedge/` - -主要涉及到下面4个模型相关的文件(mode、params、label_list.txt、infer_cfg.json)和一个APP名相关的配置文件(仅Android、iOS、HTTP需要,APP名字,非必需。) - -* ``` - ├── RES、model、infer # 模型资源文件夹,一套模型适配不同硬件、OS和部署方式 - │ ├── conf.json        # Android、iOS系统APP名字需要 - │ ├── model # 模型结构文件 - │ ├── params # 模型参数文件 - │ ├── label_list.txt # 模型标签文件 - │ ├── infer_cfg.json # 模型前后处理等配置文件 - ``` - - > ❗注意:OCR模型在ARM CPU硬件上(包括Android、Linux、iOS 三款操作系统),因为任务的特殊性,替换在 [1.模型准备](#)、[1.模型名修改和模型label](#) 不同于其他任务模型,详细参考下面步骤。 - - - -## 1.模型准备 - - - -### 1.1 Paddle模型 - -* 通过PaddleDetection、PaddleClas、PaddleOCR、PaddleSeg等导出来飞桨模型文件,包括如下文件(可能存在导出时修改了名字的情况,后缀`.pdmodel`为模型网络结构文件,后缀`.pdiparams`为模型权重文件): - -``` -model.pdmodel # 模型网络结构 -model.pdiparams # 模型权重 -model.yml # 模型的配置文件(包括预处理参数、模型定义等) -``` - - - -### 1.2 OCR模型特殊转换(仅在ARM CPU上需要) - -因为推理引擎版本的问题,OCR模型需要在[1.1 Paddle模型](#3)导出`.pdmodel`和`.pdiparams`模型后,多增加一步模型转换的特殊处理,主要执行下面2步: - - - -#### 1.2.1 下载模型转换工具 - -Linux 模型转换工具下载链接:[opt_linux](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.11/opt_linux)
-M1 模型转换工具下载链接:[opt_m1](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.11/opt_m1)
-mac 模型转换工具下载链接:[opt_mac](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.11/opt_mac)
- - - -#### 1.2.2 模型转换 - -以下命令,以mac为例,完成模型转换。 - -``` -* 转换 OCR 检测模型命名: -./opt_mac --model_dir=./ch_PP-OCRv3_det_infer/ --valid_targets=arm --optimize_out_type=naive_buffer --optimize_out=./ocr_det - -* 转换 OCR 识别模型命名: -./opt_mac --model_dir=./ch_PP-OCRv3_rec_infer/ --valid_targets=arm --optimize_out_type=naive_buffer --optimize_out=./ocr_rec -``` - -产出: - -
- - - -### 1.3 其他框架模型 - -* 如果开发着是PyTorch、TensorFLow、Caffe、ONNX等其他框架模型,可以参考[X2Paddle](https://github.com/PaddlePaddle/X2Paddle)官网完成模型转换,即可得到对应的`model.pdmodel`和`model.pdiparams`模型文件。 - - - -## 2.模型名修改和label文件准备 - - - -### 2.1 非OCR模型名修改 - -按照下面的规则,修改套件导出来的模型名和标签文件,并替换到模型资源文件中。 - -``` -1. model.pdmodel 修改成 model -2. model.pdiparams 修改成 params -``` - - - -### 2.2 OCR模型名修改 - -``` -1. ocr_det.nb 修改成 model # 将 检测模型 修改名称成 model -2. ocr_rec.nb 修改成 params # 将 识别模型 修改名称成 model -``` - - - -### 2.3 模型label文件 - -同时需要准备模型文件对应的label文件`label_list.txt`。label文件可以参考原Demo中`label_list.txt`的格式准备。 - - - -## 3. 修改模型相关配置文件 - -(1)infer_cfg.json 文件修改 - -所有程序开发者都需要关注该配置文件。开发者在自己数据/任务中训练模型,可能会修改输入图像尺寸、修改阈值等操作,因此需要根据训练情况修改`Res文件夹下的infer_cfg.json`文件中的对应。CV任务涉及到的配置文件修改包括如下字段: - -``` -1. "best_threshold": 0.3, #网络输出的阈值,根据开发者模型实际情况修改 -2. "resize": [512, 512], #[w, h]网络输入图像尺寸,用户根据实际情况修改。 -``` - -(2)conf.json 文件修改 -仅Android、iOS、HTTP服务应用开发者,需要关注该配置文件。开发者根据自己应用程序命名需要,参考已有`conf.json`即可。 - -通常,开发者修改FastDeploy项目中的模型,涉及到主要是这几个配置信息的修改。FastDeploy详细的配置文件介绍参考[完整配置文件说明](#8)。 - - - -# 测试效果 - -将自定义准备的`RES`文件,按照第2、3步完成修改后,参考可以参考[SDK使用文档](https://github.com/PaddlePaddle/FastDeploy/blob/develop/README.md#sdk%E4%BD%BF%E7%94%A8)完成自己模型上的不同预测体验。 - - - -# 完整配置文件说明 - - - -## 1. 配置文件字段含义 - -模型资源文件`infer_cfg.json`涉及到大量不同算法的前后处理等信息,下表是相关的字段介绍,通常开发者如果没有修改算法前出处理,不需要关心这些字段。非标记【必须】的可不填。 - -```json -{ - "version": 1, - "model_info": { - "best_threshold": 0.3, // 默认0.3 - "model_kind": 1, // 【必须】 1-分类,2-检测,6-实例分割,12-追踪,14-语义分割,401-人脸,402-姿态,10001-决策 - }, - "pre_process": { // 【必须】 - // 归一化, 预处理会把图像 (origin_img - mean) * scale - "skip_norm": false, // 默认为false, 如果设置为true,不做mean scale处理 - "mean": [123, 123, 123], // 【必须,一般不需要动】图像均值,已经根据Paddle套件均值做了转换处理,开发者如果没有修改套件参数,可以不用关注。(X-mean)/ scale - "scale": [0.017, 0.017, 0.017], // 【必须,一般不需要动】 - "color_format": "RGB", // BGR 【必须,一般不需要动】 - "channel_order": "CHW", // HWC - // 大小相关 - "resize": [300, 300], // w, h 【必须】 - "rescale_mode": "keep_size", // 默认keep_size, keep_ratio, keep_ratio2, keep_raw_size, warp_affine - "max_size": 1366, // keep_ratio 用。如果没有提供,则用 resize[0] - "target_size": 800, // keep_ratio 用。如果没有提供,则用 resize[1] - "raw_size_range": [100, 10000], // keep_raw_size 用 - "warp_affine_keep_res": // warp_affine模式使用,默认为false - "center_crop_size": [224, 224], // w, h, 如果需要做center_crop,则提供,否则,无需提供该字段 - "padding": false, - "padding_mode": "padding_align32", // 【非必须】默认padding_align32, 其他可指定:padding_fill_size - "padding_fill_size": [416, 416], // 【非必须】仅padding_fill_size模式下需要提供, [fill_size_w, fill_size_h], 这里padding fill对齐paddle detection实现,在bottom和right方向实现补齐 - "padding_fill_value": [114, 114, 114] // 【非必须】仅padding_fill_size模式下需要提供 - // 其他 - "letterbox": true, - }, - "post_process": { - "box_normed": true, // 默认为true, 如果为false 则表示该模型的box坐标输出不是归一化的 - } -} -``` - - - -## 2. 预处理顺序(没有的流程自动略过) - -1. 灰度图 -> rgb图变换 -2. resize 尺寸变换 -3. center_crop -4. rgb/bgr变换 -5. padding_fill_size -6. letterbox(画个厚边框,填上黑色) -7. chw/hwc变换 -8. 归一化:mean, scale -9. padding_align32 - -rescale_mode说明: - -* keep_size: 将图片缩放到resize指定的大小 -* keep_ratio:将图片按比例缩放,长边不超过max_size,短边不超过target_size -* keep_raw_size:保持原图尺寸,但必须在raw_size_range之间 -* warp_affine: 仿射变换,可以设置warp_affine_keep_res指定是否keep_res,在keep_res为false场景下,宽高通过resize字段指定 - - - -# FAQ - -### 1. 如何处理一些 undefined reference / error while loading shared libraries? - -> 如:./easyedge_demo: error while loading shared libraries: libeasyedge.so.1: cannot open shared object file: No such file or directory - -遇到该问题时,请找到具体的库的位置,设置LD_LIBRARY_PATH;或者安装缺少的库。 - -> 示例一:libverify.so.1: cannot open shared object file: No such file or directory -> 链接找不到libveirfy.so文件,一般可通过 export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:../../lib 解决(实际冒号后面添加的路径以libverify.so文件所在的路径为准) - -> 示例二:libopencv_videoio.so.4.5: cannot open shared object file: No such file or directory -> 链接找不到libopencv_videoio.so文件,一般可通过 export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:../../thirdparty/opencv/lib 解决(实际冒号后面添加的路径以libopencv_videoio.so所在路径为准) - -> 示例三:GLIBCXX_X.X.X not found -> 链接无法找到glibc版本,请确保系统gcc版本>=SDK的gcc版本。升级gcc/glibc可以百度搜索相关文献。 - -### 2. 使用libcurl请求http服务时,速度明显变慢 - -这是因为libcurl请求continue导致server等待数据的问题,添加空的header即可 - -```bash -headers = curl_slist_append(headers, "Expect:"); -``` - -### 3. 运行二进制时,提示 libverify.so cannot open shared object file - -可能cmake没有正确设置rpath, 可以设置LD_LIBRARY_PATH为sdk的lib文件夹后,再运行: - -```bash -LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../lib ./easyedge_demo -``` - -### 4. 编译时报错:file format not recognized - -可能是因为在复制SDK时文件信息丢失。请将整个压缩包复制到目标设备中,再解压缩、编译 diff --git a/docs/Windows-CPP-SDK-Inference.md b/docs/Windows-CPP-SDK-Inference.md deleted file mode 100644 index 02a5a4b9fc..0000000000 --- a/docs/Windows-CPP-SDK-Inference.md +++ /dev/null @@ -1,389 +0,0 @@ -# 简介 - -本文档以[千分类模型_MobileNetV3](https://ai.baidu.com/easyedge/app/openSource)为例,介绍FastDeploy中的模型SDK ,在**Intel x86_64 / NVIDIA GPU Windows C++** 环境下:(1)SDK 图像和视频推理部署步骤;(2)介绍模型推流全流程API,方便开发者了解项目后二次开发。 -其中Windows Python请参考[Windows Python环境下的推理部署](./Windows-Python-SDK-Inference.md)文档。 - - - -* [简介](#简介) - -* [环境准备](#环境准备) - - * [1. SDK下载](#1-sdk下载) - * [2. CPP环境](#2-cpp环境) - -* [快速开始](#快速开始) - - * [1. 项目结构说明](#1-项目结构说明) - * [2. 测试EasyEdge服务](#2-测试easyedge服务) - * [3. 预测图像](#3-预测图像) - * [4. 预测视频流](#4-预测视频流) - * [5. 编译Demo](#5-编译demo) - -* [预测API流程详解](#预测api流程详解) - - * [1. SDK参数运行配置](#1-sdk参数运行配置) - * [2. 初始化Predictor](#2-初始化predictor) - * [3. 预测推理](#3-预测推理) - * [3.1 预测图像](#31-预测图像) - * [3.2 预测视频](#32-预测视频) - -* [FAQ](#faq) - - - -# 环境准备 - -## 1. SDK下载 - -根据开发者模型、部署芯片、操作系统需要,在图像界面[飞桨开源模型](https://ai.baidu.com/easyedge/app/openSource)或[GIthub](https://github.com/PaddlePaddle/FastDeploy)中选择对应的SDK进行下载。解压缩后的文件结构如`快速开始`中[1项目介绍说明](#1-项目结构说明)介绍。 - -## 2. CPP环境 - -> 建议使用Microsoft Visual Studio 2015及以上版本,获取核心 C 和 C++ 支持,安装时请选择“使用 C++ 的桌面开发”工作负载。 - -# 快速开始 - -## 1. 项目结构说明 - -```shell -EasyEdge-win-xxx - ├── data - │ ├── model # 模型文件资源文件夹,可替换为其他模型 - │ └── config # 配置文件 - ├── bin # demo二进制程序 - │ ├── xxx_image # 预测图像demo - │ ├── xxx_video # 预测视频流demo - │ └── xxx_serving # 启动http预测服务demo - ├── dll # demo二进制程序依赖的动态库 - ├── ... # 二次开发依赖的文件 - ├── python # Python SDK文件 - ├── EasyEdge.exe # EasyEdge服务 - └── README.md # 环境说明 -``` - -## 2. 测试EasyEdge服务 - -> 模型资源文件默认已经打包在开发者下载的SDK包中,请先将zip包整体拷贝到具体运行的设备中,再解压缩使用。 - -SDK下载完成后,双击打开EasyEdge.exe启动推理服务,输入要绑定的Host ip及端口号Port,点击启动服务。 - -
-图片 -
- -服务启动后,打开浏览器输入`http://{Host ip}:{Port}`,添加图片或者视频来进行测试。 - -
- -
- -## 3. 预测图像 - -除了通过上述方式外,您还可以使用bin目录下的可执行文件来体验单一的功能。在dll目录下,点击右键,选择"在终端打开",执行如下命令。 - -> 需要将bin目录下的可执行文件移动到dll目录下执行,或者将dll目录添加到系统环境变量中。 - -```bash -.\easyedge_image_inference {模型model文件夹} {测试图片路径} -``` - -运行效果示例: - -
- -```shell -2022-06-20 10:36:57,602 INFO [EasyEdge] 9788 EasyEdge Windows Development Kit 1.5.2(Build CPU.Generic 20220607) Release -e[37m--- Fused 0 subgraphs into layer_norm op.e[0m -2022-06-20 10:36:58,008 INFO [EasyEdge] 9788 Allocate graph success. -Results of image ..\demo.jpg: -8, n01514859 hen, p:0.953429 -save result image to ..\demo.jpg.result-cpp.jpg -Done -``` - -可以看到,运行结果为`index:8,label:hen`,通过imagenet [类别映射表](https://gist.github.com/yrevar/942d3a0ac09ec9e5eb3a),可以找到对应的类别,即 'hen',由此说明我们的预测结果正确。 - -## 4. 预测视频流 - -``` -.\easyedge_video_inference {模型model文件夹} {video_type} {video_src} -``` - -其中video_type支持三种视频流类型,它们分别是:(1)本地视频文件 (2)本地摄像头id(3)网络视频流地址。 - -``` -/** - * @brief 输入源类型 - */ -enum class SourceType { - kVideoFile = 1, // 本地视频文件 - kCameraId = 2, // 摄像头的index - kNetworkStream = 3, // 网络视频流 -}; -``` - -video_src 即为文件路径。 - -## 5. 编译Demo - -在[项目结构说明](#1-项目结构说明)中,`bin`路径下的可执行文件是由`src`下的对应文件编译得到的,具体的编译命令如下。 - -``` -cd src -mkdir build && cd build -cmake .. && make -``` - -编译完成后,在build文件夹下会生成编译好的可执行文件,如图像推理的二进制文件:`build/demo_serving/easyedge_serving`。 - -# 预测API流程详解 - -本章节主要结合前文的Demo示例来介绍推理API,方便开发者学习并将运行库嵌入到开发者的程序当中,更详细的API请参考`include/easyedge/easyedge*.h`文件。图像、视频的推理包含以下3个API,查看下面的cpp代码中的step注释说明。 - -> ❗注意: -> (1)`src`文件夹中包含完整可编译的cmake工程实例,建议开发者先行了解[cmake工程基本知识](https://cmake.org/cmake/help/latest/guide/tutorial/index.html)。 -> (2)请优先参考SDK中自带的Demo工程的使用流程和说明。遇到错误,请优先参考文件中的注释、解释、日志说明。 - -```cpp - // step 1: SDK配置运行参数 - EdgePredictorConfig config; - config.model_dir = {模型文件目录}; - - // step 2: 创建并初始化Predictor;这这里选择合适的引擎 - auto predictor = global_controller()->CreateEdgePredictor(config); - - // step 3-1: 预测图像 - auto img = cv::imread({图片路径}); - std::vector results; - predictor->infer(img, results); - - // step 3-2: 预测视频 - std::vector results; - FrameTensor frame_tensor; - VideoConfig video_config; - video_config.source_type = static_cast(video_type); // source_type 定义参考头文件 easyedge_video.h - video_config.source_value = video_src; - /* - ... more video_configs, 根据需要配置video_config的各选项 - */ - auto video_decoding = CreateVideoDecoding(video_config); - while (video_decoding->next(frame_tensor) == EDGE_OK) { - results.clear(); - if (frame_tensor.is_needed) { - predictor->infer(frame_tensor.frame, results); - render(frame_tensor.frame, results, predictor->model_info().kind); - } - //video_decoding->display(frame_tensor); // 显示当前frame,需在video_config中开启配置 - //video_decoding->save(frame_tensor); // 存储当前frame到视频,需在video_config中开启配置 - } -``` - -若需自定义library search path或者gcc路径,修改对应Demo工程下的CMakeList.txt即可。 - -## 1. SDK参数运行配置 - -SDK的参数通过`EdgePredictorConfig::set_config`和`global_controller()->set_config`配置。本Demo 中设置了模型路径,其他参数保留默认参数。更详细的支持运行参数等,可以参考开发工具包中的头文件(`include/easyedge/easyedge_xxxx_config.h`)的详细说明。 - -配置参数使用方法如下: - -``` -EdgePredictorConfig config; -config.model_dir = {模型文件目录}; -``` - -## 2. 初始化Predictor - -- 接口 - - ```cpp - auto predictor = global_controller()->CreateEdgePredictor(config); - predictor->init(); - ``` - -若返回非0,请查看输出日志排查错误原因。 - -## 3. 预测推理 - -### 3.1 预测图像 - -> 在Demo中展示了预测接口infer()传入cv::Mat& image图像内容,并将推理结果赋值给std::vector& result。更多关于infer()的使用,可以根据参考`easyedge.h`头文件中的实际情况、参数说明自行传入需要的内容做推理 - -- 接口输入 - -```cpp - /** - * @brief - * 通用接口 - * @param image: must be BGR , HWC format (opencv default) - * @param result - * @return - */ - virtual int infer(cv::Mat& image, std::vector& result) = 0; -``` - -图片的格式务必为opencv默认的BGR, HWC格式。 - -- 接口返回 - - `EdgeResultData`中可以获取对应的分类信息、位置信息。 - -```cpp -struct EdgeResultData { - int index; // 分类结果的index - std::string label; // 分类结果的label - float prob; // 置信度 - - // 物体检测 或 图像分割时使用: - float x1, y1, x2, y2; // (x1, y1): 左上角, (x2, y2): 右下角; 均为0~1的长宽比例值。 - - // 图像分割时使用: - cv::Mat mask; // 0, 1 的mask - std::string mask_rle; // Run Length Encoding,游程编码的mask -}; -``` - -*** 关于矩形坐标 *** - -x1 * 图片宽度 = 检测框的左上角的横坐标 - -y1 * 图片高度 = 检测框的左上角的纵坐标 - -x2 * 图片宽度 = 检测框的右下角的横坐标 - -y2 * 图片高度 = 检测框的右下角的纵坐标 - -*** 关于图像分割mask *** - -``` -cv::Mat mask为图像掩码的二维数组 -{ - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 1, 1, 1, 0, 0, 0, 0}, - {0, 0, 0, 1, 1, 1, 0, 0, 0, 0}, - {0, 0, 0, 1, 1, 1, 0, 0, 0, 0}, - {0, 0, 0, 1, 1, 1, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, -} -其中1代表为目标区域,0代表非目标区域 -``` - -*** 关于图像分割mask_rle *** - -该字段返回了mask的游程编码,解析方式可参考 [http demo](https://github.com/Baidu-AIP/EasyDL-Segmentation-Demo)。 - -以上字段可以参考demo文件中使用opencv绘制的逻辑进行解析。 - -### 3.2 预测视频 - -SDK 提供了支持摄像头读取、视频文件和网络视频流的解析工具类`VideoDecoding`,此类提供了获取视频帧数据的便利函数。通过`VideoConfig`结构体可以控制视频/摄像头的解析策略、抽帧策略、分辨率调整、结果视频存储等功能。对于抽取到的视频帧可以直接作为SDK infer 接口的参数进行预测。 - -- 接口输入 - -class`VideoDecoding`: - -``` - /** - * @brief 获取输入源的下一帧 - * @param frame_tensor - * @return - */ - virtual int next(FrameTensor &frame_tensor) = 0; - - /** - * @brief 显示当前frame_tensor中的视频帧 - * @param frame_tensor - * @return - */ - virtual int display(const FrameTensor &frame_tensor) = 0; - - /** - * @brief 将当前frame_tensor中的视频帧写为本地视频文件 - * @param frame_tensor - * @return - */ - virtual int save(FrameTensor &frame_tensor) = 0; - - /** - * @brief 获取视频的fps属性 - * @return - */ - virtual int get_fps() = 0; - /** - * @brief 获取视频的width属性 - * @return - */ - virtual int get_width() = 0; - - /** - * @brief 获取视频的height属性 - * @return - */ - virtual int get_height() = 0; -``` - -struct `VideoConfig` - -``` -/** - * @brief 视频源、抽帧策略、存储策略的设置选项 - */ -struct VideoConfig { - SourceType source_type; // 输入源类型 - std::string source_value; // 输入源地址,如视频文件路径、摄像头index、网络流地址 - int skip_frames{0}; // 设置跳帧,每隔skip_frames帧抽取一帧,并把该抽取帧的is_needed置为true - int retrieve_all{false}; // 是否抽取所有frame以便于作为显示和存储,对于不满足skip_frames策略的frame,把所抽取帧的is_needed置为false - int input_fps{0}; // 在采取抽帧之前设置视频的fps - Resolution resolution{Resolution::kAuto}; // 采样分辨率,只对camera有效 - - bool enable_display{false}; // 默认不支持。 - std::string window_name{"EasyEdge"}; - bool display_all{false}; // 是否显示所有frame,若为false,仅显示根据skip_frames抽取的frame - - bool enable_save{false}; - std::string save_path; // frame存储为视频文件的路径 - bool save_all{false}; // 是否存储所有frame,若为false,仅存储根据skip_frames抽取的frame - - std::map conf; -}; -``` - -| 序号 | 字段 | 含义 | -| --- | -------------- | ---------------------------------------------------------------------------------------------------------------------------------- | -| 1 | `source_type` | 输入源类型,支持视频文件、摄像头、网络视频流三种,值分别为1、2、3 | -| 2 | `source_value` | 若`source_type`为视频文件,该值为指向视频文件的完整路径;若`source_type`为摄像头,该值为摄像头的index,如对于`/dev/video0`的摄像头,则index为0;若`source_type`为网络视频流,则为该视频流的完整地址。 | -| 3 | `skip_frames` | 设置跳帧,每隔skip_frames帧抽取一帧,并把该抽取帧的is_needed置为true,标记为is_needed的帧是用来做预测的帧。反之,直接跳过该帧,不经过预测。 | -| 4 | `retrieve_all` | 若置该项为true,则无论是否设置跳帧,所有的帧都会被抽取返回,以作为显示或存储用。 | -| 5 | `input_fps` | 用于抽帧前设置fps | -| 6 | `resolution` | 设置摄像头采样的分辨率,其值请参考`easyedge_video.h`中的定义,注意该分辨率调整仅对输入源为摄像头时有效 | -| 7 | `conf` | 高级选项。部分配置会通过该map来设置 | - -*** 注意:*** - -1. `VideoConfig`不支持`display`功能。如果需要使用`VideoConfig`的`display`功能,需要自行编译带有GTK选项的OpenCV。 - -2. 使用摄像头抽帧时,如果通过`resolution`设置了分辨率调整,但是不起作用,请添加如下选项: - - ``` - video_config.conf["backend"] = "2"; - ``` - -3. 部分设备上的CSI摄像头尚未兼容,如遇到问题,可以通过工单、QQ交流群或微信交流群反馈。 - -具体接口调用流程,可以参考SDK中的`demo_video_inference`。 - -# FAQ - -1. 执行infer_demo文件时,提示your generated code is out of date and must be regenerated with protoc >= 3.19.0 - -进入当前项目,首先卸载protobuf - -```shell -python3 -m pip uninstall protobuf -``` - -安装低版本protobuf - -```shell -python3 -m pip install protobuf==3.19.0 -``` diff --git a/docs/Windows-CPP-SDK-Serving.md b/docs/Windows-CPP-SDK-Serving.md deleted file mode 100644 index e2215466bd..0000000000 --- a/docs/Windows-CPP-SDK-Serving.md +++ /dev/null @@ -1,275 +0,0 @@ -# 简介 - -本文档以[千分类模型_MobileNetV3](https://ai.baidu.com/easyedge/app/openSource)为例,本文档介绍FastDeploy中的模型SDK,在**Intel x86_64 / NVIDIA GPU、Windows操作系统** 的C++环境:(1)HTTP服务化推理部署步骤,(2)介绍推理全流程API,方便开发者了解项目后二次开发。 -如果开发者对Python语言的相关能力感兴趣,可以参考Windows Python请参考[Windows Python环境下的推理部署](./Windows-Python-SDK-Serving.md)文档。 - - - -* [简介](#简介) - -* [环境准备](#环境准备) - - * [1. SDK下载](#1-sdk下载) - * [2. CPP环境](#2-cpp环境) - -* [快速开始](#快速开始) - - * [1. 项目结构说明](#1-项目结构说明) - * [2. 测试EasyEdge服务](#2-测试easyedge服务) - * [3. 启动HTTP预测服务](#3-启动http预测服务) - * [4. 编译Demo](#4-编译demo) - -* [HTTP API流程详解](#http-api流程详解) - - * [1. 开启http服务](#1-开启http服务) - * [2. 请求http服务](#2-请求http服务) - * [2.1 http 请求方式一:不使用图片base64格式](#21-http-请求方式一不使用图片base64格式) - * [2.2 http 请求方法二:使用图片base64格式](#22-http-请求方法二使用图片base64格式) - * [3. http 返回数据](#3-http-返回数据) - -* [FAQ](#faq) - - - -# 环境准备 - -## 1. SDK下载 - -根据开发者模型、部署芯片、操作系统需要,在图像界面[飞桨开源模型](https://ai.baidu.com/easyedge/app/openSource)或[GIthub](https://github.com/PaddlePaddle/FastDeploy)中选择对应的SDK进行下载。解压缩后的文件结构如`快速开始`中[1项目介绍说明](#1-%E9%A1%B9%E7%9B%AE%E7%BB%93%E6%9E%84%E8%AF%B4%E6%98%8E)介绍。 - -```shell - -``` - -## 2. CPP环境 - -> 建议使用Microsoft Visual Studio 2015及以上版本,获取核心 C 和 C++ 支持,安装时请选择“使用 C++ 的桌面开发”工作负载。 - -# 快速开始 - -## 1. 项目结构说明 - -```shell -EasyEdge-win-xxx - ├── data - │ ├── model # 模型文件资源文件夹,可替换为其他模型 - │ └── config # 配置文件 - ├── bin # demo二进制程序 - │ ├── xxx_image # 预测图像demo - │ ├── xxx_video # 预测视频流demo - │ └── xxx_serving # 启动http预测服务demo - ├── dll # demo二进制程序依赖的动态库 - ├── ... # 二次开发依赖的文件 - ├── python # Python SDK文件 - ├── EasyEdge.exe # EasyEdge服务 - └── README.md # 环境说明 -``` - -## 2. 测试EasyEdge服务 - -> 模型资源文件默认已经打包在开发者下载的SDK包中,请先将zip包整体拷贝到具体运行的设备中,再解压缩使用。 - -SDK下载完成后,双击打开EasyEdge.exe启动推理服务,输入要绑定的Host ip及端口号Port,点击启动服务。 - -
-图片 -
- -服务启动后,打开浏览器输入`http://{Host ip}:{Port}`,添加图片或者视频来进行测试。 - -
- -
-## 3. 启动HTTP预测服务 - -除了通过上述方式外,您还可以使用bin目录下的可执行文件来体验单一的功能。在dll目录下,点击右键,选择"在终端打开",执行如下命令。 - -> 需要将bin目录下的可执行文件移动到dll目录下执行,或者将dll目录添加到系统环境变量中。 - -``` -.\easyedge_serving {模型model文件夹路径} -``` - -启动后,日志中会显示如下字样。 - -``` -HTTP is now serving at 0.0.0.0:24401 -``` - -此时,开发者可以打开浏览器,`http://127.0.0.1:24401`,执行和之前一样的操作即可。 - -![](https://user-images.githubusercontent.com/54695910/175854073-fb8189e5-0ffb-472c-a17d-0f35aa6a8418.png) - -## 4. 编译Demo - -在[项目结构说明](#1项目结构说明)中,`bin`路径下的可执行文件是由`src`下的对应文件编译得到的,具体的编译命令如下。 - -``` -cd src -mkdir build && cd build -cmake .. && make -``` - -编译完成后,在build文件夹下会生成编译好的可执行文件,如图像推理的二进制文件:`build/demo_serving/easyedge_serving`。 - -# HTTP API流程详解 - -本章节主要结合[2.1 HTTP Demo](#4)的API介绍,方便开发者学习并将运行库嵌入到开发者的程序当中,更详细的API请参考`include/easyedge/easyedge*.h`文件。http服务包含服务端和客户端,目前支持的能力包括以下几种方式,Demo中提供了不使用图片base格式的`方式一:浏览器请求的方式`,其他几种方式开发者根据个人需要,选择开发。 - -## 1. 开启http服务 - -http服务的启动可直接使用`bin/easyedge_serving`,或参考`src/demo_serving.cpp`文件修改相关逻辑 - -```cpp - /** - * @brief 开启一个简单的demo http服务。 - * 该方法会block直到收到sigint/sigterm。 - * http服务里,图片的解码运行在cpu之上,可能会降低推理速度。 - * @tparam ConfigT - * @param config - * @param host - * @param port - * @param service_id service_id user parameter, uri '/get/service_id' will respond this value with 'text/plain' - * @param instance_num 实例数量,根据内存/显存/时延要求调整 - * @return - */ - template - int start_http_server( - const ConfigT &config, - const std::string &host, - int port, - const std::string &service_id, - int instance_num = 1); -``` - -## 2. 请求http服务 - -> 开发者可以打开浏览器,`http://{设备ip}:24401`,选择图片来进行测试。 - -### 2.1 http 请求方式一:不使用图片base64格式 - -URL中的get参数: - -| 参数 | 说明 | 默认值 | -| --------- | --------- | ---------------- | -| threshold | 阈值过滤, 0~1 | 如不提供,则会使用模型的推荐阈值 | - -HTTP POST Body即为图片的二进制内容(无需base64, 无需json) - -Python请求示例 - -```Python -import requests - -with open('./1.jpg', 'rb') as f: - img = f.read() - result = requests.post( - 'http://127.0.0.1:24401/', - params={'threshold': 0.1}, - data=img).json() -``` - -### 2.2 http 请求方法二:使用图片base64格式 - -HTTP方法:POST -Header如下: - -| 参数 | 值 | -| ------------ | ---------------- | -| Content-Type | application/json | - -**Body请求填写**: - -- 分类网络: - body 中请求示例 - - ``` - { - "image": "" - "top_num": 5 - } - ``` - - body中参数详情 - -| 参数 | 是否必选 | 类型 | 可选值范围 | 说明 | -| ------- | ---- | ------ | ----- | ----------------------------------------------------------------------------------- | -| image | 是 | string | - | 图像数据,base64编码,要求base64图片编码后大小不超过4M,最短边至少15px,最长边最大4096px,支持jpg/png/bmp格式 **注意去掉头部** | -| top_num | 否 | number | - | 返回分类数量,不填该参数,则默认返回全部分类结果 | - -- 检测和分割网络: - Body请求示例: - - ``` - { - "image": "" - } - ``` - - body中参数详情: - -| 参数 | 是否必选 | 类型 | 可选值范围 | 说明 | -| --------- | ---- | ------ | ----- | ----------------------------------------------------------------------------------- | -| image | 是 | string | - | 图像数据,base64编码,要求base64图片编码后大小不超过4M,最短边至少15px,最长边最大4096px,支持jpg/png/bmp格式 **注意去掉头部** | -| threshold | 否 | number | - | 默认为推荐阈值,也可自行根据需要进行设置 | - -## 3. http 返回数据 - -| 字段 | 类型说明 | 其他 | -| ---------- | ------ | ------------------------------------ | -| error_code | Number | 0为成功,非0参考message获得具体错误信息 | -| results | Array | 内容为具体的识别结果。其中字段的具体含义请参考`预测图像-返回格式`一节 | -| cost_ms | Number | 预测耗时ms,不含网络交互时间 | - -返回示例 - -```json -{ - "cost_ms": 52, - "error_code": 0, - "results": [ - { - "confidence": 0.94482421875, - "index": 1, - "label": "IronMan", - "x1": 0.059185408055782318, - "x2": 0.18795496225357056, - "y1": 0.14762254059314728, - "y2": 0.52510076761245728, - "mask": "...", // 图像分割模型字段 - "trackId": 0, // 目标追踪模型字段 - }, - - ] -} -``` - -*** 关于矩形坐标 *** - -x1 * 图片宽度 = 检测框的左上角的横坐标 - -y1 * 图片高度 = 检测框的左上角的纵坐标 - -x2 * 图片宽度 = 检测框的右下角的横坐标 - -y2 * 图片高度 = 检测框的右下角的纵坐标 - -*** 关于分割模型 *** - -其中,mask为分割模型的游程编码,解析方式可参考 [http demo](https://github.com/Baidu-AIP/EasyDL-Segmentation-Demo)。 - -# FAQ - -1. 执行infer_demo文件时,提示your generated code is out of date and must be regenerated with protoc >= 3.19.0 - -进入当前项目,首先卸载protobuf - -```shell -python3 -m pip uninstall protobuf -``` - -安装低版本protobuf - -```shell -python3 -m pip install protobuf==3.19.0 -``` diff --git a/docs/Windows-Python-SDK-Inference.md b/docs/Windows-Python-SDK-Inference.md deleted file mode 100644 index 877488c22e..0000000000 --- a/docs/Windows-Python-SDK-Inference.md +++ /dev/null @@ -1,381 +0,0 @@ -# 简介 - -本文档以[千分类模型_MobileNetV3](https://ai.baidu.com/easyedge/app/openSource)为例,介绍 FastDeploy中的模型SDK ,在**Intel x86_64 / NVIDIA GPU Windows Python** 环境下: (1)图像推理部署步骤; (2)介绍模型推流全流程API,方便开发者了解项目后二次开发。 -其中Windows Python请参考[Windows C++环境下的推理部署](./Windows-CPP-SDK-Inference.md)文档。 - - - -* [简介](#简介) - -* [环境准备](#环境准备) - - * [1. SDK下载](#1-sdk下载) - * [2. Python环境](#2-python环境) - * [3. 安装依赖](#3-安装依赖) - * [3.1 安装paddlepaddle](#31-安装paddlepaddle) - * [3.2 安装EasyEdge Python Wheel 包](#32-安装easyedge-python-wheel-包) - -* [快速开始](#快速开始) - - * [1. 文件结构说明](#1-文件结构说明) - * [2. 测试Demo](#2-测试demo) - * [2.1 预测图像](#21-预测图像) - -* [预测API流程详解](#预测api流程详解) - - * [1. 基础流程](#1-基础流程) - * [2. 初始化](#2-初始化) - * [3. SDK参数配置](#3-sdk参数配置) - * [4. 预测图像](#4-预测图像) - -* [FAQ](#faq) - - - -# 环境准备 - -## 1. SDK下载 - -根据开发者模型、部署芯片、操作系统需要,在图像界面[飞桨开源模型](https://ai.baidu.com/easyedge/app/openSource)或[GIthub](https://github.com/PaddlePaddle/FastDeploy)中选择对应的SDK进行下载。解压缩后的文件结构如下所示: - -```shell -EasyEdge-win-[部署芯片] -├── data # 模型文件资源文件夹,可替换为其他模型 -├── ... # C++/C# 相关文件 -├── python # Python SDK文件 -├── EasyEdge.exe # 主程序 -└── README.md # 环境说明 -``` - - - -## 2. Python环境 - -> 当前SDK仅支持Python 3.7 - -打开命令行工具,使用如下命令获取已安装的Python版本号。如果还没有安装Python环境,可以前往[官网](https://www.python.org/)下载Python 3.7对应的安装程序,特别要注意勾上`Add Python 3.7 to PATH`,然后点“Install Now”即可完成安装。 - -```shell -python --version -``` - -如果本机的版本不匹配,建议使用[pyenv](https://github.com/pyenv/pyenv)、[anaconda](https://www.anaconda.com/)等Python版本管理工具对Python SDK所在目录进行配置。 - -接着使用如下命令确认pip的版本是否满足要求,要求pip版本为20.2.2或更高版本。详细的pip安装过程可以参考[官网教程](https://pip.pypa.io/en/stable/installation/)。 - -```shell -python -m pip --version -``` - -## 3. 安装依赖 - -### 3.1 安装paddlepaddle - -根据具体的部署芯片(CPU/GPU)安装对应的PaddlePaddle的whl包。`x86_64 CPU` 平台可以使用如下命令进行安装: - -```shell -python -m pip install paddlepaddle==2.2.2 -i https://mirror.baidu.com/pypi/simple -``` - -`NVIDIA GPU平台`的详细安装教程可以参考[官网Paddle安装教程](https://www.paddlepaddle.org.cn/install/quick?docurl=/documentation/docs/zh/install/pip/linux-pip.html)。 - -> 使用 NVIDIA GPU 预测时,必须满足: -> -> 1. 机器已安装 cuda, cudnn -> -> 2. 已正确安装对应 cuda 版本的paddle 版本 -> 3. 通过设置环境变量`FLAGS_fraction_of_gpu_memory_to_use`设置合理的初始内存使用比例 - - - -### 3.2 安装EasyEdge Python Wheel 包 - -在`python`目录下,安装Python3.7版本对应的EasyEdge Wheel包。对`x86_64 CPU` 或 `x86_64 Nvidia GPU平台 `可以使用如下命令进行安装,具体名称以 Python SDK 包中的 whl 为准。 - -```shell -python -m pip install -U BaiduAI_EasyEdge_SDK-{SDK版本号}-cp37-cp37m-win_amd64.whl -``` - - - -# 快速开始 - - - -## 1. 文件结构说明 - -Python SDK文件结构如下: - -```shell -EasyEdge-win-[部署芯片] -├── data # 模型文件资源文件夹,可替换为其他模型 -│ ├── model # 模型文件资源文件夹,可替换为其他模型 -│ └── config # 配置文件 -├── ... # C++/C# 相关文件 -├── python # Python SDK文件 -│ ├── # 特定Python 3.7版本的EasyEdge Wheel包, 二次开发可使用 -│ ├── BaiduAI_EasyEdge_SDK-${SDK版本号}-cp37-cp37m-win_amd64.whl -│ ├── requirements.txt # -│ ├── infer_demo # demo体验完整文件 -│ │ ├── demo_xxx.py # 包含前后处理的端到端推理demo文件 -│ │ └── demo_serving.py # 提供http服务的demo文件 -│ └── tensor_demo # tensor in/out demo文件 -``` - - - -## 2. 测试Demo - - - -### 2.1 预测图像 - -根据部署平台,使用infer_demo文件夹下的demo文件,执行如下命令。 - -```shell -python demo_x86_cpu.py {模型model文件夹} {测试图片路径} -``` - -运行效果示例: - -
- -```shell -2022-06-14 18:35:44 DEBUG [EasyEdge] [demo_x86_cpu.py:41] 19424: Config:: w: 256, h: 256; mean: [123.675, 116.28, 103.53]; scale: [0.01712475 0.017507 0.01742919] -2022-06-14 18:35:44 INFO [EasyEdge] [demo_x86_cpu.py:41] 19424: Init paddlefluid engine... -2022-06-14 18:35:45 INFO [EasyEdge] [demo_x86_cpu.py:41] 19424: Paddle version: 2.2.2 -2022-06-14 18:35:45 DEBUG [EasyEdge] [demo_x86_cpu.py:41] 19424: CPU thread num set to 1 -2022-06-14 18:35:45 DEBUG [EasyEdge] [demo_x86_cpu.py:55] 19424: resize to w257, h256 -2022-06-14 18:35:45 DEBUG [EasyEdge] [demo_x86_cpu.py:55] 19424: Switch to CHW -2022-06-14 18:35:45 DEBUG [EasyEdge] [demo_x86_cpu.py:55] 19424: Infer cost: 70.1(66.1) ms -{'confidence': 0.9012351036071777, 'index': 8, 'label': 'n01514859 hen'} -``` - -可以看到,运行结果为`index:8,label:hen`,通过imagenet [类别映射表](https://gist.github.com/yrevar/942d3a0ac09ec9e5eb3a),可以找到对应的类别,即 'hen',由此说明我们的预测结果正确。 - -# 预测API流程详解 - -本章节主要结合前文的Demo示例来介绍推理API,方便开发者学习并将运行库嵌入到开发者的程序当中,更详细的API请参考`infer_demo/demo_xx_xx.py`文件,查看下面的Python代码中的step注释说明。 - -## 1. 基础流程 - -> ❗注意,请优先参考SDK中自带demo的使用流程和说明。遇到错误,请优先参考文件中的注释、解释、日志说明。 - -`infer_demo/demo_xx_xx.py` - -```python -# 引入EasyEdge运行库 -import BaiduAI.EasyEdge as edge - -# 创建并初始化一个预测Progam;选择合适的引擎 -pred = edge.Program() -pred.init(model_dir={RES文件夹路径}, device=edge.Device.CPU, engine=edge.Engine.PADDLE_FLUID) # x86_64 CPU -# pred.init(model_dir=_model_dir, device=edge.Device.GPU, engine=edge.Engine.PADDLE_FLUID) # x86_64 Nvidia GPU -# pred.init(model_dir=_model_dir, device=edge.Device.CPU, engine=edge.Engine.PADDLE_LITE) # armv8 CPU - -# 预测图像 -res = pred.infer_image({numpy.ndarray的图片}) - -# 关闭结束预测Progam -pred.close() -``` - -`infer_demo/demo_serving.py` - -```python -import BaiduAI.EasyEdge as edge -from BaiduAI.EasyEdge.serving import Serving - -# 创建并初始化Http服务 -server = Serving(model_dir={RES文件夹路径}, license=serial_key) - -# 运行Http服务 -# 请参考同级目录下demo_xx_xx.py里: -# pred.init(model_dir=xx, device=xx, engine=xx, device_id=xx) -# 对以下参数device\device_id和engine进行修改 -server.run(host=host, port=port, device=edge.Device.CPU, engine=edge.Engine.PADDLE_FLUID) # x86_64 CPU -# server.run(host=host, port=port, device=edge.Device.GPU, engine=edge.Engine.PADDLE_FLUID) # x86_64 Nvidia GPU -# server.run(host=host, port=port, device=edge.Device.CPU, engine=edge.Engine.PADDLE_LITE) # armv8 CPU -``` - -## 2. 初始化 - -- 接口 - -```python - def init(self, - model_dir, - device=Device.CPU, - engine=Engine.PADDLE_FLUID, - config_file='conf.json', - preprocess_file='preprocess_args.json', - model_file='model', - params_file='params', - label_file='label_list.txt', - infer_cfg_file='infer_cfg.json', - device_id=0, - thread_num=1 - ): - """ - Args: - model_dir: str - device: BaiduAI.EasyEdge.Device,比如:Device.CPU - engine: BaiduAI.EasyEdge.Engine, 比如: Engine.PADDLE_FLUID - config_file: str - preprocess_file: str - model_file: str - params_file: str - label_file: str 标签文件 - infer_cfg_file: 包含预处理、后处理信息的文件 - device_id: int 设备ID - thread_num: int CPU的线程数 - - Raises: - RuntimeError, IOError - Returns: - bool: True if success - """ -``` - -若返回不是True,请查看输出日志排查错误原因。 - -## 3. SDK参数配置 - -使用 CPU 预测时,可以通过在 init 中设置 thread_num 使用多线程预测。如: - -```python -pred.init(model_dir=_model_dir, device=edge.Device.CPU, engine=edge.Engine.PADDLE_FLUID, thread_num=4) -``` - -使用 GPU 预测时,可以通过在 init 中设置 device_id 指定需要的GPU device id。如: - -```python -pred.init(model_dir=_model_dir, device=edge.Device.GPU, engine=edge.Engine.PADDLE_FLUID, device_id=0) -``` - -## 4. 预测图像 - -- 接口 - -```python - def infer_image(self, img, - threshold=0.3, - channel_order='HWC', - color_format='BGR', - data_type='numpy') - """ - - Args: - img: np.ndarray or bytes - threshold: float - only return result with confidence larger than threshold - channel_order: string - channel order HWC or CHW - color_format: string - color format order RGB or BGR - data_type: string - 仅在图像分割时有意义。 'numpy' or 'string' - 'numpy': 返回已解析的mask - 'string': 返回未解析的mask游程编码 - - Returns: - list - - """ -``` - -| 字段 | 类型 | 取值 | 说明 | -| ---------- | -------------------- | --------- | ------------------------ | -| confidence | float | 0~1 | 分类或检测的置信度 | -| label | string | | 分类或检测的类别 | -| index | number | | 分类或检测的类别 | -| x1, y1 | float | 0~1 | 物体检测,矩形的左上角坐标 (相对长宽的比例值) | -| x2, y2 | float | 0~1 | 物体检测,矩形的右下角坐标(相对长宽的比例值) | -| mask | string/numpy.ndarray | 图像分割的mask | | - -***关于矩形坐标*** - -x1 * 图片宽度 = 检测框的左上角的横坐标 - -y1 * 图片高度 = 检测框的左上角的纵坐标 - -x2 * 图片宽度 = 检测框的右下角的横坐标 - -y2 * 图片高度 = 检测框的右下角的纵坐标 - -可以参考 demo 文件中使用 opencv 绘制矩形的逻辑。 - -***结果示例*** - -i) 图像分类 - -```json -{ - "index": 736, - "label": "table", - "confidence": 0.9 -} -``` - -ii) 物体检测 - -```json -{ - "index": 8, - "label": "cat", - "confidence": 1.0, - "x1": 0.21289, - "y1": 0.12671, - "x2": 0.91504, - "y2": 0.91211, -} -``` - -iii) 图像分割 - -```json -{ - "name": "cat", - "score": 1.0, - "location": { - "left": ..., - "top": ..., - "width": ..., - "height": ..., - }, - "mask": ... -} -``` - -mask字段中,data_type为`numpy`时,返回图像掩码的二维数组 - -```text -{ - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 1, 1, 1, 0, 0, 0, 0}, - {0, 0, 0, 1, 1, 1, 0, 0, 0, 0}, - {0, 0, 0, 1, 1, 1, 0, 0, 0, 0}, - {0, 0, 0, 1, 1, 1, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, -} -其中1代表为目标区域,0代表非目标区域 -``` - -data_type为`string`时,mask的游程编码,解析方式可参考 [demo](https://github.com/Baidu-AIP/EasyDL-Segmentation-Demo)。 - - -# FAQ - -1. 执行infer_demo文件时,提示your generated code is out of date and must be regenerated with protoc >= 3.19.0 - -进入当前项目,首先卸载protobuf - -```shell -python3 -m pip uninstall protobuf -``` - -安装低版本protobuf - -```shell -python3 -m pip install protobuf==3.19.0 -``` diff --git a/docs/Windows-Python-SDK-Serving.md b/docs/Windows-Python-SDK-Serving.md deleted file mode 100644 index 36999b45a1..0000000000 --- a/docs/Windows-Python-SDK-Serving.md +++ /dev/null @@ -1,262 +0,0 @@ -# 简介 - -本文档以[千分类模型_MobileNetV3](https://ai.baidu.com/easyedge/app/openSource)为例,介绍FastDeploy中的模型SDK,在**Intel x86_64 /NVIDIA GPU、Windows操作系统** 的Python环境:(1)HTTP服务化推理部署步骤,(2)介绍推理全流程API,方便开发者了解项目后二次开发。 -如果开发者对C++语言的相关能力感兴趣,可以参考Windows C++请参考[Windows C++环境下的推理部署](./Windows-CPP-SDK-Serving.md)文档。 - - - -* [简介](#简介) - -* [环境准备](#环境准备) - - * [1. SDK下载](#1-sdk下载) - * [2. Python环境](#2-python环境) - * [3. 安装依赖](#3-安装依赖) - * [3.1 安装paddlepaddle](#31-安装paddlepaddle) - * [3.2 安装EasyEdge Python Wheel 包](#32-安装easyedge-python-wheel-包) - -* [快速开始](#快速开始) - - * [1. 文件结构说明](#1-文件结构说明) - * [2. 测试Demo](#2-测试demo) - * [2.1 启动HTTP预测服务](#21-启动http预测服务) - -* [HTTP API流程详解](#http-api流程详解) - - * [1. 开启http服务](#1-开启http服务) - - * [2. 请求http服务](#2-请求http服务) - - * [2.1 http 请求方式:不使用图片base64格式](#21-http-请求方式不使用图片base64格式) - - * [3. http返回数据](#3-http返回数据) - - - -# 环境准备 - -## 1. SDK下载 - -根据开发者模型、部署芯片、操作系统需要,在图像界面[飞桨开源模型](https://ai.baidu.com/easyedge/app/openSource)或[GIthub](https://github.com/PaddlePaddle/FastDeploy)中选择对应的SDK进行下载。解压缩后的文件结构如下所示: - -```shell -EasyEdge-win-[部署芯片] -├── data # 模型文件资源文件夹,可替换为其他模型 -├── ... # C++/C# 相关文件 -├── python # Python SDK文件 -├── EasyEdge.exe # 主程序 -└── README.md # 环境说明 -``` - -## 2. Python环境 - -> 当前SDK仅支持Python 3.7 - -打开命令行工具,使用如下命令获取已安装的Python版本号。如果还没有安装Python环境,可以前往[官网](https://www.python.org/)下载Python 3.7对应的安装程序,特别要注意勾上`Add Python 3.7 to PATH`,然后点“Install Now”即可完成安装。 - -```shell -python --version -``` - -如果本机的版本不匹配,建议使用[pyenv](https://github.com/pyenv/pyenv)、[anaconda](https://www.anaconda.com/)等Python版本管理工具对Python SDK所在目录进行配置。 - -接着使用如下命令确认pip的版本是否满足要求,要求pip版本为20.2.2或更高版本。详细的pip安装过程可以参考[官网教程](https://pip.pypa.io/en/stable/installation/)。 - -```shell -python -m pip --version -``` - -## 3. 安装依赖 - -### 3.1 安装paddlepaddle - -根据具体的部署芯片(CPU/GPU)安装对应的PaddlePaddle的whl包。`x86_64 CPU` 平台可以使用如下命令进行安装: - -```shell -python -m pip install paddlepaddle==2.2.2 -i https://mirror.baidu.com/pypi/simple -``` - -`NVIDIA GPU平台`的详细安装教程可以参考[官网Paddle安装教程](https://www.paddlepaddle.org.cn/install/quick?docurl=/documentation/docs/zh/install/pip/linux-pip.html)。 - -> 使用 NVIDIA GPU 预测时,必须满足: -> -> 1. 机器已安装 cuda, cudnn -> -> 2. 已正确安装对应 cuda 版本的paddle 版本 -> 3. 通过设置环境变量`FLAGS_fraction_of_gpu_memory_to_use`设置合理的初始内存使用比例 - -### 3.2 安装EasyEdge Python Wheel 包 - -在`python`目录下,安装Python3.7版本对应的EasyEdge Wheel包。对`x86_64 CPU` 或 `x86_64 Nvidia GPU平台 `可以使用如下命令进行安装,具体名称以 Python SDK 包中的 whl 为准。 - -```shell -python -m pip install -U BaiduAI_EasyEdge_SDK-{SDK版本号}-cp37-cp37m-win_amd64.whl -``` - -# 快速开始 - -## 1. 文件结构说明 - -Python SDK文件结构如下: - -```shell -EasyEdge-win-[部署芯片] -├── data # 模型文件资源文件夹,可替换为其他模型 -│ ├── model # 模型文件资源文件夹,可替换为其他模型 -│ └── config # 配置文件 -├── ... # C++/C# 相关文件 -├── python # Python SDK文件 -│ ├── # 特定Python 3.7版本的EasyEdge Wheel包, 二次开发可使用 -│ ├── BaiduAI_EasyEdge_SDK-${SDK版本号}-cp37-cp37m-win_amd64.whl -│ ├── requirements.txt # -│ ├── infer_demo # demo体验完整文件 -│ │ ├── demo_xxx.py # 包含前后处理的端到端推理demo文件 -│ │ └── demo_serving.py # 提供http服务的demo文件 -│ └── tensor_demo # tensor in/out demo文件 -``` - -## 2. 测试Demo - -### 2.1 启动HTTP预测服务 - -```shell -python demo_serving.py {模型model文件夹} {host, default 0.0.0.0} {port, default 24401} -``` - -成功启动后,终端中会显示如下字样。 - -```shell -2022-06-14 18:45:15 INFO [EasyEdge] [demo_serving.py:50] 21212: Init paddlefluid engine... -2022-06-14 18:45:16 INFO [EasyEdge] [demo_serving.py:50] 21212: Paddle version: 2.2.2 - * Serving Flask app 'Serving' (lazy loading) - * Environment: production - WARNING: This is a development server. Do not use it in a production deployment. - Use a production WSGI server instead. - * Debug mode: off - * Running on all addresses (0.0.0.0) - WARNING: This is a development server. Do not use it in a production deployment. - * Running on http://127.0.0.1:24401 - * Running on http://192.168.3.17:24401 (Press CTRL+C to quit) -``` - -开发者此时可以打开浏览器,输入`http://{host ip}:24401`,选择图片或者视频来进行测试,运行效果如下。 - -
- -
- -# HTTP API流程详解 - -本章节主要结合前文的Demo示例来对API进行介绍,方便开发者学习并将运行库嵌入到开发者的程序当中,更详细的API请参考对应的Python文件。http服务包含服务端和客户端,Demo中提供了不使用图片base格式的`方式一:浏览器请求的方式`,其他几种方式开发者根据个人需要,选择开发。 - -## 1. 开启http服务 - -http服务的启动使用`demo_serving.py`文件 - -```python -class Serving(object): - """ SDK local serving """ - - def __init__(self, model_dir, license='', model_filename='model', params_filename='params'): - - self.program = None - self.model_dir = model_dir - self.model_filename = model_filename - self.params_filename = params_filename - self.program_lock = threading.Lock() - self.license_key = license - # 只有ObjectTracking会初始化video_processor - self.video_processor = None - - def run(self, host, port, device, engine=Engine.PADDLE_FLUID, service_id=0, device_id=0, **kwargs): - """ Args: host : str port : str device : BaiduAI.EasyEdge.Device,比如:Device.CPU engine : BaiduAI.EasyEdge.Engine, 比如: Engine.PADDLE_FLUID """ - self.run_serving_with_flask(host, port, device, engine, service_id, device_id, **kwargs) -``` - -## 2. 请求http服务 - -> 开发者可以打开浏览器,`http://{设备ip}:24401`,选择图片来进行测试。 - -### 2.1 http 请求方式:不使用图片base64格式 - -URL中的get参数: - -| 参数 | 说明 | 默认值 | -| --------- | --------- | ---------------- | -| threshold | 阈值过滤, 0~1 | 如不提供,则会使用模型的推荐阈值 | - -HTTP POST Body即为图片的二进制内容。 - -Python请求示例 - -```python -import requests - -with open('./1.jpg', 'rb') as f: - img = f.read() - result = requests.post( - 'http://127.0.0.1:24401/', - params={'threshold': 0.1}, - data=img).json() -``` - -## 3. http返回数据 - -| 字段 | 类型说明 | 其他 | -| ---------- | ------ | ------------------------------------ | -| error_code | Number | 0为成功,非0参考message获得具体错误信息 | -| results | Array | 内容为具体的识别结果。其中字段的具体含义请参考`预测图像-返回格式`一节 | -| cost_ms | Number | 预测耗时ms,不含网络交互时间 | - -返回示例 - -```json -{ - "cost_ms": 52, - "error_code": 0, - "results": [ - { - "confidence": 0.94482421875, - "index": 1, - "label": "IronMan", - "x1": 0.059185408055782318, - "x2": 0.18795496225357056, - "y1": 0.14762254059314728, - "y2": 0.52510076761245728, - "mask": "...", // 图像分割模型字段 - "trackId": 0, // 目标追踪模型字段 - }, - - ] -} -``` - -***关于矩形坐标*** - -x1 * 图片宽度 = 检测框的左上角的横坐标 - -y1 * 图片高度 = 检测框的左上角的纵坐标 - -x2 * 图片宽度 = 检测框的右下角的横坐标 - -y2 * 图片高度 = 检测框的右下角的纵坐标 - -***关于分割模型*** - -其中,mask为分割模型的游程编码,解析方式可参考 [demo](https://github.com/Baidu-AIP/EasyDL-Segmentation-Demo)。 - -**FAQ** - -1. 执行infer_demo文件时,提示your generated code is out of date and must be regenerated with protoc >= 3.19.0 - -进入当前项目,首先卸载protobuf - -```shell -python3 -m pip uninstall protobuf -``` - -安装低版本protobuf - -```shell -python3 -m pip install protobuf==3.19.0 -``` diff --git a/docs/compile/README.md b/docs/compile/README.md new file mode 100644 index 0000000000..909ac893ca --- /dev/null +++ b/docs/compile/README.md @@ -0,0 +1,17 @@ +# FastDeploy编译 + +本文档说明编译C++预测库、Python预测库两种编译过程,根据编译的平台参考如下文档 + +- [Linux & Mac 编译](linux_and_mac.md) +- [Windows编译](windows.md) + +其中编译过程中,各平台上编译选项如下表所示 + +| 选项 | 作用 | 备注 | +|:---- | :--- | :--- | +| ENABLE_ORT_BACKEND | 启用ONNXRuntime推理后端,默认ON | - | +| WIGH_GPU | 是否开启GPU使用,默认OFF | 当设为TRUE时,须通过CUDA_DIRECTORY指定cuda目录,如/usr/local/cuda; Mac上不支持设为ON | +| ENABLE_TRT_BACKEND | 启用TensorRT推理后端,默认OFF | 当设为TRUE时,需通过TRT_DIRECTORY指定tensorrt目录,如/usr/downloads/TensorRT-8.4.0.1; Mac上不支持设为ON| +| ENABLE_VISION | 编译集成视觉模型模块,包括OpenCV的编译集成,默认OFF | - | +| ENABLE_PADDLE_FRONTEND | 编译集成Paddle2ONNX,默认ON | - | +| ENABLE_DEBUG | 当为ON时,支持输出DEBUG信息,但可能会有性能损耗,默认OFF | - | diff --git a/docs/compile/linux_and_mac.md b/docs/compile/linux_and_mac.md new file mode 100644 index 0000000000..93e2db580e --- /dev/null +++ b/docs/compile/linux_and_mac.md @@ -0,0 +1,32 @@ +# Linux & Mac编译 + +## 编译C++ +``` +git clone https://gitee.com/jiangjiajun/FastDeploy.git +cd FastDeploy +git submodule init +git submodule update +mkdir build & cd build +cmake .. -DENABLE_ORT_BACKEND=ON \ + -DENABLE_VISION=ON \ + -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy-0.0.3 +make -j8 +make install +``` +编译后的预测库即在当前目录下的`fastdeploy-0.0.3` + +## 编译Python安装包 +``` +git clone https://gitee.com/jiangjiajun/FastDeploy.git +cd FastDeploy +git submodule init +git submodule update +# Python通过export环境变量设置编译选项 +export ENABLE_ORT_BACKEND=ON +export ENABLE_VISION=ON +python setup.py build +python setup.py bdist_wheel +``` +编译后的wheel包即在当前目录下的`dist`目录中 + +编译选项说明参考[编译指南](./README.md) diff --git a/docs/compile/windows.md b/docs/compile/windows.md new file mode 100644 index 0000000000..78a9209cdf --- /dev/null +++ b/docs/compile/windows.md @@ -0,0 +1,3 @@ +# Windows编译 + +还没写 diff --git a/docs/cpp/README.md b/docs/cpp/README.md new file mode 100644 index 0000000000..72524b214e --- /dev/null +++ b/docs/cpp/README.md @@ -0,0 +1,110 @@ +# C++部署 + +## 准备预测库 + +参考编译文档[FastDeploy编译](../compile/README.md)进行编译,或直接使用如下预编译库 + +| 编译库 | 平台 | 支持设备 | 说明 | +|:------ | :---- | :------- | :----- | +|[fastdeploy-linux-x64-0.0.3.tgz](https://bj.bcebos.com/paddle2onnx/fastdeploy/fastdeploy-linux-x64-0.0.3.tgz) | Linux | CPU | 集成ONNXRuntime | +|[fastdeploy-linux-x64-gpu-0.0.3.tgz](https://bj.bcebos.com/paddle2onnx/fastdeploy/fastdeploy-linux-x64-gpu-0.0.3.tgz) | Linux | CPU/GPU | 集成ONNXRuntime, TensorRT | +|[fastdeploy-osx-x86_64-0.0.3.tgz](https://bj.bcebos.com/paddle2onnx/fastdeploy/fastdeploy-osx-x86_64-0.0.3.tgz) | Mac OSX Intel CPU | CPU | 集成ONNXRuntime | +|[fastdeploy-osx-arm64-0.0.3.tgz](https://bj.bcebos.com/paddle2onnx/fastdeploy/fastdeploy-osx-arm64-0.0.3.tgz) | Mac OSX M1 CPU | CPU | 集成ONNXRuntime | + + +## 使用 + +FastDeploy提供了多种领域内的模型,可快速完成模型的部署,本文档以YOLOv5在Linux上的部署为例 + +``` +# 下载库并解压 +wget https://bj.bcebos.com/paddle2onnx/fastdeploy/fastdeploy-linux-x64-0.0.3.tgz +tar xvf fastdeploy-linux-x64-0.0.3.tgz + +# 下载模型和测试图片 +wget https://github.com/ultralytics/yolov5/releases/download/v6.0/yolov5s.onnx +wget https://raw.githubusercontent.com/ultralytics/yolov5/master/data/images/bus.jpg +``` + +### YOLOv5预测代码 + +准备如下`yolov5.cc`代码 +``` +#include "fastdeploy/vision.h" + +int main() { + typedef vis = fastdeploy::vision; + + auto model = vis::ultralytics::YOLOv5("yolov5s.onnx"); // 加载模型 + + if (!model.Initialized()) { // 判断模型是否初始化成功 + std::cerr << "Initialize failed." << std::endl; + return -1; + } + + cv::Mat im = cv::imread("bus.jpg"); // 读入图片 + + vis::DetectionResult res; + if (!model.Predict(&im, &res)) { // 预测图片 + std::cerr << "Prediction failed." << std::endl; + return -1; + } + + std::cout << res.Str() << std::endl; // 输出检测结果 + return 0; +} +``` + +### 编译代码 + +编译前先完成CMakeLists.txt的开发,在`yolov5.cc`同级目录创建`CMakeLists.txt`文件,内容如下 +``` +PROJECT(yolov5_demo C CXX) +CMAKE_MINIMUM_REQUIRED (VERSION 3.16) +# 在低版本ABI环境中,可通过如下代码进行兼容性编译 +# add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0) + +# 在下面指定下载解压后的fastdeploy库路径 +set(FASTDEPLOY_INSTALL_DIR /ssd1/download/fastdeploy-linux-x64-0.0.3/) + +include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake) + +# 添加FastDeploy依赖头文件 +include_directories(${FASTDEPLOY_INCS}) + +add_executable(yolov5_demo ${PROJECT_SOURCE_DIR}/yolov5.cc) +message(${FASTDEPLOY_LIBS}) +# 添加FastDeploy库依赖 +target_link_libraries(yolov5_demo ${FASTDEPLOY_LIBS}) +~ +``` + +此时当前目录结构如下所示 +``` +- demo_directory +|___fastdeploy-linux-x64-0.0.3/ # 预测库解压 +|___yolov5.cc # 示例代码 +|___CMakeLists.txt # cmake文件 +|___yolov5s.onnx # 模型文件 +|___bus.jpeg # 测试图片 +``` + +执行如下命令进行编译 +``` +cmake . +make -j +``` +编译后可执行二进制即为当前目录下的`yolov5_demo`,使用如下命令执行 +``` +./yolov5_demo +``` + +即会加载模型进行推理,得到结果如下 +``` +DetectionResult: [xmin, ymin, xmax, ymax, score, label_id] +223.395126,403.948669, 345.337189, 867.339050, 0.856906, 0 +668.301758,400.781372, 808.441772, 882.534973, 0.829716, 0 +50.210758,398.571289, 243.123383, 905.016846, 0.805375, 0 +23.768217,214.979355, 802.627869, 778.840820, 0.756311, 5 +0.737200,552.281006, 78.617218, 890.945007, 0.363471, 0 +``` diff --git a/docs/iOS-SDK.md b/docs/iOS-SDK.md deleted file mode 100644 index 5a1fa0ffca..0000000000 --- a/docs/iOS-SDK.md +++ /dev/null @@ -1,212 +0,0 @@ -# 简介 - -本文档介绍FastDeploy中的模型SDK,在iOS环境下:(1)推理部署步骤;(2)介绍SDK使用说明,方便开发者了解项目后二次开发。 - - - -* [简介](#简介) - -* [系统支持说明](#系统支持说明) - - * [1. 系统支持说明](#1-系统支持说明) - * [2. SDK大小说明](#2-sdk大小说明) - -* [快速开始](#快速开始) - - * [1. 项目结构说明](#1-项目结构说明) - * [2. 测试Demo](#2-测试demo) - -* [SDK使用说明](#sdk使用说明) - - * [1. 集成指南](#1-集成指南) - * [1.1 依赖库集成](#11-依赖库集成) - * [2. 调用流程示例](#2-调用流程示例) - * [2.1 初始化](#21-初始化) - * [2.2 预测图像](#22-预测图像) - -* [FAQ](#faq) - - - -# 系统支持说明 - -## 1. 系统支持说明 - -1. 系统支持:iOS 9.0及以上。 - -2. 硬件支持:支持 arm64 (Starndard architectures),暂不支持模拟器。 - - * 官方验证过的手机机型:大部分ARM 架构的手机、平板及开发板。 - -3.其他说明 - - * 3.1 【图像分割类模型】(1)图像分割类Demo暂未提供实时摄像头录制拍摄的能力,开发者可根据自己需要,进行安卓开发完成;(2)PP-Humanseg-Lite模型设计初衷为横屏视频会议等场景,本次安卓开发仅支持述评场景,开发者可根据自己需要,开发横屏的Android功能。
- - * 3.2 【OCR模型】OCR任务第一次启动任务,第一张推理时间久,属于正常情况(因为涉及到模型加载、预处理等工作)。
- -## 2. SDK大小说明 - -1. 模型资源文件大小影响 SDK 大小 -2. SDK 包及 IPA 安装包虽然比较大,但最终安装到设备后所占大小会缩小很多。这与 multi architechtures、bitcode 和 AppStore 的优化有关。 - -# 快速开始 - -## 1. 项目结构说明 - -根据开发者模型、部署芯片、操作系统需要,在图像界面[飞桨开源模型](https://ai.baidu.com/easyedge/app/openSource)或[GIthub](https://github.com/PaddlePaddle/FastDeploy)中选择对应的SDK进行下载。SDK目录结构如下: - -``` -.EasyEdge-iOS-SDK -├── EasyDLDemo # Demo工程文件 -├── LIB # 依赖库 -├── RES -│ ├── easyedge      # 模型资源文件夹,一套模型适配不同硬件、OS和部署方式 -│ ├── conf.json        # Android、iOS系统APP名字需要 -│ ├── model # 模型结构文件 -│ ├── params # 模型参数文件 -│ ├── label_list.txt # 模型标签文件 -│ ├── infer_cfg.json # 模型前后处理等配置文件 -└── DOC # 文档 -``` - -## 2. 测试Demo - -按如下步骤可直接运行 SDK 体验 Demo: -步骤一:用 Xcode 打开 `EasyDLDemo/EasyDLDemo.xcodeproj` -步骤二:配置开发者自己的签名(不了解签名机制的,可以看FAQ [iOS签名介绍](#100))
-步骤三:连接手机运行,不支持模拟器 - -检测模型运行示例: - -
- -# SDK使用说明 - -本节介绍如何将 SDK 接入开发者的项目中使用。 - -## 1. 集成指南 - -步骤一:依赖库集成 -步骤二:`import ` - -### 1.1 依赖库集成 - -1. 复制 LIB 目录至项目合适的位置 -2. 配置 Build Settings 中 Search paths: 以 SDK 中 LIB 目录路径为例 -- Framework Search Paths:`${PROJECT_DIR}/../LIB/lib` -- Header Search Paths:`${PROJECT_DIR}/../LIB/include` -- Library Search Paths:`${PROJECT_DIR}/../LIB/lib` - -> 集成过程如出现错误,请参考 Demo 工程对依赖库的引用 - -## 2. 调用流程示例 - -以通用ARM的图像分类预测流程为例,详细说明请参考后续章节: - -``` -NSError *err; - -// step 1: 初始化模型 -EasyDLModel *model = [[EasyDLModel alloc] initModelFromResourceDirectory:@"easyedge" withError:&err]; - -// step 2: 准备待预测的图像 -UIImage *image = ...; - -// step 3: 预测图像 -NSArray *results = [model detectUIImage:image withFilterScore:0 andError:&err]; - -// step 4: 解析结果 -for (id res in results) { - EasyDLClassfiData *clsData = (EasyDLClassfiData *) res; - NSLog(@"labelIndex=%d, labelName=%@, confidence=%f", clsData.category, clsData.label, clsData.accuracy); -} -``` - -### 2.1 初始化 - -``` -// 示例 -// 参数一为模型资源文件夹名称 -EasyDLModel *model = [[EasyDLModel alloc] initModelFromResourceDirectory:@"easyedge" withError:&err]; -``` - -> 模型资源文件夹需以 folder reference 方式加入 Xcode 工程,如 `RES/easyedge` 文件夹在 Demo 工程中表现为蓝色 - -### 2.2 预测图像 - -所有模型类型通过以下接口获取预测结果: - -``` -// 返回的数组类型不定 -NSArray *results = [model detectUIImage:image withFilterScore:0 andError:&err]; -``` - -返回的数组类型如下,具体可参考 `EasyDLResultData.h` 中的定义: -| 模型类型 | 类型 | -| --- | ---- | -| 图像分类 | EasyDLClassfiData | -| 物体检测/人脸检测 | EasyDLObjectDetectionData | -| 实例分割 | EasyDLObjSegmentationData | -| 姿态估计 | EasyDLPoseData | -| 文字识别 | EasyDLOcrData | - -# FAQ - -1. 如何多线程并发预测? - -SDK内部已经能充分利用多核的计算能力。不建议使用并发来预测。 - -如果开发者想并发使用,请务必注意`EasyDLModel`所有的方法都不是线程安全的。请初始化多个实例进行并发使用,如 - -```c -- (void)testMultiThread { - UIImage *img = [UIImage imageNamed:@"1.jpeg"]; - NSError *err; - EasyDLModel * model1 = [[EasyDLModel alloc] initModelFromResourceDirectory:@"easyedge" withError:&err]; - EasyDLModel * model2 = [[EasyDLModel alloc] initModelFromResourceDirectory:@"easyedge" withError:&err]; - - dispatch_queue_t queue1 = dispatch_queue_create("testQueue", DISPATCH_QUEUE_CONCURRENT); - dispatch_queue_t queue2 = dispatch_queue_create("testQueue2", DISPATCH_QUEUE_CONCURRENT); - - dispatch_async(queue1, ^{ - NSError *detectErr; - for(int i = 0; i < 1000; ++i) { - NSArray * res = [model1 detectUIImage:img withFilterScore:0 andError:&detectErr]; - NSLog(@"1: %@", res[0]); - } - }); - - dispatch_async(queue2, ^{ - NSError *detectErr; - for(int i = 0; i < 1000; ++i) { - NSArray * res = [model2 detectUIImage:img withFilterScore:0 andError:&detectErr]; - NSLog(@"2: %@", res[0]); - } - }); -} -``` - -2. 编译时出现 Undefined symbols for architecture arm64: ... -* 出现 `cxx11, vtable` 字样:请引入 `libc++.tbd` -* 出现 `cv::Mat` 字样:请引入 `opencv2.framework` -* 出现 `CoreML`, `VNRequest` 字样:请引入`CoreML.framework` 并务必`#import ` -3. 运行时报错 Image not found: xxx ... - -请Embed具体报错的库。 - -4. 编译时报错:Invalid bitcode version - -这个可能是开发者使用的 Xcode 低于12导致,可以升级至12版本。 - -5. 错误说明 - -SDK 的方法会返回 NSError,直接返回的 NSError 的错误码定义在 `EasyDLDefine.h - EEasyDLErrorCode` 中。NSError 附带 message (有时候会附带 NSUnderlyingError),开发者可根据 code 和 message 进行错误判断和处理。 - -6. iOS签名说明 - -iOS 签名是苹果生态对 APP 开发者做的限定,对于个人开发者是免费的,对于企业开发者(譬如APP要上架应用市场),是收费的。此处,仅简单说明作为普通开发者,第一次尝试使用 Xcode编译代码,需要进行的签名操作。
-(1)在Xcode/Preferences/Accounts 中添加个人Apple ID;
-(2)在对应的EasyDLDemo中做如下图设置:
- -
-(3)(2)后会在手机上安装好对应APP,还需要在手机上`设置/通用/设备管理/开发者应用/信任appleID`,才能运行该 APP。 diff --git a/docs/tech/design.md b/docs/tech/design.md new file mode 100644 index 0000000000..ec85692266 --- /dev/null +++ b/docs/tech/design.md @@ -0,0 +1,24 @@ +# FastDeploy + +FastDeploy分为`Runtime`和`应用`模块。 + +## Runtime +`Runtime`对应于不同硬件上的不同后端,大部分情况下,一种硬件对应于一种后端,但对于CPU、GPU, 存在多种后端,用户可根据自己的需求进行选择。 + +| Runtime | 后端 | +| :------ | :---- | +| CPU(x86_64) | `fastdeploy::Backend::ORT` | +| GPU(Nvidia) | `fastdeploy::Backend::ORT` / `fastdeploy::Backend::TRT` | + +具体文档参考 [Runtime文档](runtime.md) + + +## 应用 + +应用是基于`Runtime`提供的上层模型推理,集成了模型端到端的推理功能 + +- Vision +- Text +- Audio + +具体文档参考 [Vision文档](vision.md) diff --git a/docs/tech/models.md b/docs/tech/models.md new file mode 100644 index 0000000000..07b57c8b1c --- /dev/null +++ b/docs/tech/models.md @@ -0,0 +1,63 @@ +# 模型开发 + +以`ultralytics/yolov5`为例,在`fastdeploy/vision`目录下新增`ultralytics`目录,并创建代码`yolov5.h` + +定义`YOLOv5`类 + +``` +class YOLOv5 : public FastDeployModel { + public: + // 构造函数指定模型路径,并默认为ONNX格式 + YOLOv5(const std::string& model_file) + : FastDeployModel(model_file, "", Frontend::ONNX) { + size = {640, 640}; // 图像预处理resize大小 + // 图像填充值 + padding_value = {114.0, 114.0, 114.0}; + // 是否只填充到满足stride的最小方框即可 + bool is_mini_pad = false; + // 是否支持图像resize超过原图尺寸 + bool is_scale_up = true; + // 步长,padding到长宽为stride的倍数 + stride = 32; + + // 通过下面的两个参数,来说明模型在CPU/GPU上支持的后端种类 + // 指定Device后,默认情况下,会优先选择最前的后端 + valid_cpu_backends = {Backend::ORT}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; + } + + std::string ModelName() const; // 返回模型名 + + // 模型初始化, 须在此函数中主动调用基类的`InitBackend()`函数 + // 来初始化runtime + // 一些模型前后处理的初始化也可在此函数中,如ppdet/ppcls创建一个 + // 数据预处理pipeline + bool Init(); + + // 预处理,其中输入是vision::Mat结构,输出是FDTensor + // 输出提供给runtime进行推理使用 + bool Preprocess(Mat* mat, FDTensor* output); + + // 后处理,输入是runtime的输入FDTensor + // 一些跟模型相关的预处理参数 + bool Postprocess(FDTensor& tensor, DetectionResult* res, float conf_thresh, float nms_iou_thresh); + + // 端到端的推理函数,包含前后处理 + // 因此一般也建议将后处理的部分参数放在这个接口中 + bool Predict(cv::Mat* im, DetectionResult* result, float conf_thresh = 0.25, float nms_iou_thresh = 0.5); +}; +``` + +模型的实现上,并没有特别强的规范约束,但是 +- 1. 一定要继承`FastDeployModel` +- 2. 确定可用的`valid_cpu_backends`和`valid_gpu_backends` +- 3. 要实现`Init()`/`ModelName()`/`Predict()`三个接口 +- 4. 建议统一为`Preprocess`和`Postprocess`两个接口作为前后处理所用 + + +## 其它 + +在`vision`中,会提供几类基础的数据结构使用,包括`vision::ClassifyResult`、`vision::DetectionResult`、`vision::SegmentationResult`等作为模型常见的输出结构。 但难免会遇到新的输出结构不在这几类中,对于一定要定制化的数据结构,默认按照下面方式处理 + +- 1. 如果是大量模型通用的结构,仍然实现在`vision/common.h`中,作为通用的输出结构 +- 2. 如果只是某个模型需要,则实现在如`vision/ultralytics/yolov5.h`中,同时需要自行为此结构体进行pybind封装 diff --git a/docs/tech/runtime.md b/docs/tech/runtime.md new file mode 100644 index 0000000000..946b4a0246 --- /dev/null +++ b/docs/tech/runtime.md @@ -0,0 +1,135 @@ +# fastdeploy::Runtime + +## FDTensor Runtime的输入输出数据结构 + +``` +struct FDTensor { + std::vector shape; // 形状 + std::string name; // 命名 + FDDataType dtype; // 数据类型 + Device device = Device::CPU; // 数据存放设备 + + void* MutableData(); // 获取tensor内存buffer指针 + + // 获取tensor数据,如若tensor数据在其它设备 + // 此函数会先将数据拷贝至CPU,再返回指向 + // CPU内存buffer的指针 + void* Data(); + + // 初始化Tensor,并复用外部数据指针 + // Tensor的内存buffer将由外部的调用者来创建或释放 + void SetExternalData(const std::vector& new_shape, + const FDDataType& data_type, + void* data_buffer + const Device& dev); + + int Nbytes() const; // 返回tensor数据字节大小 + + int Numel() const; // 返回tensor元素个数 + + // Debug函数,打印tensor的信息,包含mean、max、min等 + void PrintInfo(const std::string& prefix = "TensorInfo"); +}; +``` + +FDTensor是前后处理与`Runtime`进行对接的数据结构,大多情况下建议通过`SetExternalData`来共享用户传入的数据,减小内存拷贝带来的开销。 + +## Runtime 多后端推理引擎 + +### RuntimeOption 引擎配置 +``` +struct RuntimeOption { + // 模型文件和权重文件 + std::string model_file; + std::string params_file; + // 模型格式,当前可支持Frontend::PADDLE / Frontend::ONNX + Frontend model_format = Frontend::PADDLE; + Backend backend = Backend::ORT: + + // CPU上运行时的线程数 + int cpu_thread_num = 8; + + // 推理硬件,当前支持Device::CPU / Device::GPU + // 在CPU/GPU上需与backend进行搭配选择 + Device device; + + // Backend::ORT的参数 + int ort_graph_opt_level; + int ort_inter_op_num_threads; + int ort_execution_mode; + + // Backend::TRT的参数 + std::map> trt_fixed_shape; + std::map> trt_max_shape; + std::map> trt_min_shape; + std::map> trt_opt_shape; + std::string trt_serialize_file = ""; + bool trt_enable_fp16 = false; + bool trt_enable_int8 = false; + size_t trt_max_batch_size = 32; +}; +``` + + +### Runtime 引擎 + +``` +struct Runtime { + // 加载模型,引擎初始化 + bool Init(const RuntimeOption& _option); + + // 进行推理 + // 其中输入须正确配置tensor中的name + bool Infer(std::vector& inputs, std::vector* outputs); + + int NumInputs(); // 输入个数 + int NumOutputs(); // 输出个数 + + TensorInfo GetInputInfo(int index) // 获取输入信息,包括shape, dtype, name + TensorInfo GetOutputInfo(int index) // 获取输出信息,包括shape, dtype, name + + RuntimeOption option; // 引擎的配置信息 +}; +``` + + +## Runtime使用示例 + +### C++ + +``` +#include "fastdeploy/fastdeploy_runtime.h" + +int main() { + auto option = fastdeploy::RuntimeOption(); + option.model_file = "resnet50/inference.pdmodel"; + option.params_file = "resnet50/inference.pdiparams"; + + auto runtime = fastdeploy::Runtime(); + assert(runtime.Init(option)); + + // 需准备好输入tensor + std::vector inputs; + + std::vector outputs; + assert(runtime.Infer(tensors, &outputs)); + + // 输出tensor的debug信息查看 + outputs[0].PrintInfo(); +} +``` + +### Python + +``` +import fastdeploy as fd +import numpy as np + +option = fd.RuntimeOption(); +option.model_file = "resnet50/inference.pdmodel" +option.params_file = "resnet50/inference.pdiparams"; + +runtime = fd.Runtime(option) + +result = runtime.infer({"image": np.random.rand(1, 3, 224, 224)}); +``` diff --git a/docs/tech/vision.md b/docs/tech/vision.md new file mode 100644 index 0000000000..90f4c43fa9 --- /dev/null +++ b/docs/tech/vision.md @@ -0,0 +1,74 @@ +# Vision + +Vision是FastDeploy中的视觉模型模块,包含`processors`和`utils`两个公共模块,以及模型模块。 + +## processors 图像处理模块 + +`processors`提供了常见的图像处理操作,并为各操作实现不同的后端,如当前支持的CPU以及GPU两种处理方式,在模型中预算中,开发者调用`processors`提供的API,即可快速在不同的处理后端进行切换。 + +默认在CPU上进行处理 +``` +namespace vis = fastdeploy::vision; + +im = cv2.imread("test.jpg"); + +vis::Mat mat(im); +assert(vis::Resize::Run(&mat, 224, 224)); +assert(vis::Normalize::Run(&mat, {0.5, 0.5, 0.5}, {0.5, 0.5, 0.5})); +assert(vis::HWC2CHW::Run(&mat)); +``` + +切换为CUDA GPU进行处理 +``` +namespace vis = fastdeploy::vision; +vis::Processor::default_lib = vis::ProcessorLib::OPENCV_CUDA; + +im = cv2.imread("test.jpg"); + +vis::Mat mat(im); +assert(vis::Resize::Run(&mat, 224, 224)); +assert(vis::Normalize::Run(&mat, {0.5, 0.5, 0.5}, {0.5, 0.5, 0.5})); +assert(vis::HWC2CHW::Run(&mat)); +``` + +在处理过程中,通过`fastdeploy::vision::Mat`作为传递的数据结构 +``` +struct Mat { + Mat(cv::Mat); // 通过`cv::Mat`进行构造 + FDDataType Type(); // 数值类型 + int Channels(); // 通道数 + int Width(); // 宽 + int Height(); // 高 + + // 获取图像,如Mat在GPU上,则会拷贝到CPU上再返回 + cv::Mat GetCpuMat(); + + // 获取图像,如Mat在CPU上,则会拷贝到GPU上再返回 + cv::cuda::GpuMat GetGpuMat(); + + void ShareWithTensor(FDTensor* tensor); // 构造一个FDTensor,并共享内存 + bool CopyToTensor(FDTensor* tensor); // 构造一个CPU上的FDTensor,并将数据拷贝过去 + + Layout layout; // 数据排布,支持Layout::HWC / Layout::CHW + Device device; // 数据存放设备,支持Device::CPU / Device::GPU +}; +``` + +## utilities模块 工具模块 + +提供一些常见的函数,如分类模型常用的`TopK`选择,检测模型的`NMS`操作。同样后面可以考虑将后处理的实现也有不同后端 + + +## visualize 可视化模块 + +提供一些可视化函数,如检测、分割、OCR都需要这种函数来看可视化的效果 + +## 模型模块 + +这个是`Vision`中最重要的模块,所有的模块均通过`域名` + `模型名`来划分,如 + +- vision::ppdet::YOLOv3 // PaddleDetection的YOLOv3模型 +- vision::ppdet::RCNN // PaddleDetection的RCNN类模型 +- vision::ultralytics::YOLOv5 // https://github.com/ultralytics/yolov5 YOLOv5模型 + +模型的增加参考[模型开发](models.md) diff --git a/docs/usage/model.md b/docs/usage/model.md new file mode 100644 index 0000000000..d5326b3c11 --- /dev/null +++ b/docs/usage/model.md @@ -0,0 +1,57 @@ +# FastDeploy模型 + +目前支持的模型如下 +- [fastdeploy.vision.ppcls.Model](vision/ppcls.md) PaddleClas里的所有分类模型 +- [fastdeploy.vision.ultralytics/YOLOv5](vision/ultralytics.md) [ultralytics/yolov5](https://github.com/ultralytics/yolov5)模型 + +具体模型使用方式可参考各模型文档API和示例说明。 各模型在运行时均有默认的Runtime配置,本文档说明如何修改模型的后端配置,其中如下代码为跑YOLOv5的模型Python示例代码 +``` +import fastdeploy as fd +model = fd.vision.ulttralytics.YOLOv5("yolov5s.onnx") + +import cv2 +im = cv2.imread('bus.jpg') + +result = model.predict(im) + +print(model.runtime_option) +``` +通过`print(model.runtime_option)`可以看到如下信息 +``` +RuntimeOption( + backend : Backend.ORT # 当前推理后端为ONNXRuntime + cpu_thread_num : 8 # 推理时CPU线程数设置(仅当模型在CPU上推理时有效) + device : Device.GPU # 当前推理设备为GPU + device_id : 0 # 当前推理设备id为0 + model_file : yolov5s.onnx # 模型文件路径 + model_format : Frontend.ONNX # 模型格式,当前为ONNX格式 + ort_execution_mode : -1 # ONNXRuntime后端的配置参数,-1表示默认 + ort_graph_opt_level : -1 # ONNXRuntime后端的配置参数, -1表示默认 + ort_inter_op_num_threads : -1 # ONNXRuntime后端的配置参数,-1表示默认 + params_file : # 参数文件(ONNX模型无此文件) + trt_enable_fp16 : False # TensorRT参数 + trt_enable_int8 : False # TensorRT参数 + trt_fixed_shape : {} # TensorRT参数 + trt_max_batch_size : 32 # TensorRT参数 + trt_max_shape : {} # TensorRT参数 + trt_max_workspace_size : 1073741824 # TensorRT参数 + trt_min_shape : {} # TensorRT参数 + trt_opt_shape : {} # TensorRT参数 + trt_serialize_file : # TensorRT参数 +) +``` + +会注意到参数名以`ort`开头的,均为ONNXRuntime后端专有的参数;以`trt`的则为TensorRT后端专有的参数。各后端与参数的配置,可参考[RuntimeOption](runtime_option.md)说明。 + +## 切换模型推理方式 + +一般而言,用户只需关注推理是在哪种Device下即可。 当然有更进一步需求,可以再为Device选择不同的Backend,但配置时注意Device与Backend的搭配。 如Backend::TRT只支持Device为GPU, 而Backend::ORT则同时支持CPU和GPU + +``` +import fastdeploy as fd +option = fd.RuntimeOption() +option.device = fd.Device.CPU +option.cpu_thread_num = 12 +model = fd.vision.ulttralytics.YOLOv5("yolov5s.onnx", option) +print(model.runtime_option) +``` diff --git a/docs/usage/vision/ppcls.md b/docs/usage/vision/ppcls.md new file mode 100644 index 0000000000..fc795fae5c --- /dev/null +++ b/docs/usage/vision/ppcls.md @@ -0,0 +1,104 @@ +# PaddleClas分类模型推理 + +PaddleClas模型导出参考[PaddleClas](https://github.com/PaddlePaddle/PaddleClas.git) + +## Python API说明 + +### Model类 +``` +fastdeploy.vision.ppcls.Model(model_file, params_file, config_file, runtime_option=None, model_format=fastdeploy.Frontend.PADDLE) +``` + +**参数** + +> * **model_file**(str): 模型文件,如resnet50/inference.pdmodel +> * **params_file**(str): 参数文件,如resnet50/inference.pdiparams +> * **config_file**(str): 配置文件,来源于PaddleClas提供的推理配置文件,如[inference_cls.yaml](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.3/deploy/configs/inference_cls.yaml) +> * **runtime_option**(fd.RuntimeOption): 后端推理的配置, 默认为None,即采用默认配置 +> * **model_format**(fd.Frontend): 模型格式说明,PaddleClas的模型格式均为Frontend.PADDLE + +#### predict接口 +``` +Model.predict(image_data, topk=1) +``` + +> **参数** +> +> > * **image_data**(np.ndarray): 输入数据, 注意需为HWC,RGB格式 +> > * **topk**(int): 取前top的分类 + +> **返回结果** +> +> > * **result**(ClassifyResult):结构体包含`label_ids`和`scores`两个list成员变量,表示类别,和各类别对应的置信度 + +### 示例 + +> ``` +> import fastdeploy.vision as vis +> import cv2 +> model = vis.ppcls.Model("resnet50/inference.pdmodel", "resnet50/inference.pdiparams", "resnet50/inference_cls.yaml") +> im = cv2.imread("test.jpeg") +> result = model.predict(im, topk=5) +> print(result.label_ids[0], result.scores[0]) +> ``` + +## C++ API说明 + +需添加头文件`#include "fastdeploy/vision.h"` + +### Model类 + +``` +fastdeploy::vision::ppcls::Model( + const std::string& model_file, + const std::string& params_file, + const std::string& config_file, + const RuntimeOption& custom_option = RuntimeOption(), + const Frontend& model_format = Frontend::PADDLE) +``` + +**参数** +> * **model_file**: 模型文件,如resnet50/inference.pdmodel +> * **params_file**: 参数文件,如resnet50/inference.pdiparams +> * **config_file**: 配置文件,来源于PaddleClas提供的推理配置文件,如[inference_cls.yaml](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.3/deploy/configs/inference_cls.yaml) +> * **runtime_option**: 后端推理的配置, 不设置的情况下,采用默认配置 +> * **model_format**: 模型格式说明,PaddleClas的模型格式均为Frontend.PADDLE + +#### Predict接口 +``` +bool Model::Predict(cv::Mat* im, ClassifyResult* result, int topk = 1) +``` + +> **参数** +> > * **im**: 输入图像数据,须为HWC,RGB格式(注意传入的im在预处理过程中会被修改) +> > * **result**: 分类结果 +> > * **topk**: 取分类结果前topk + +> **返回结果** +> > true或false,表示预测成功与否 + +### 示例 +> ``` +> #include "fastdeploy/vision.h" +> +> int main() { +> typedef vis = fastdeploy::vision; +> auto model = vis::ppcls::Model("resnet50/inference.pdmodel", "resnet50/inference.pdiparams", "resnet50/inference_cls.yaml"); +> +> if (!model.Initialized()) { +> std::cerr << "Initialize failed." << std::endl; +> return -1; +> } +> +> cv::Mat im = cv::imread("test.jpeg"); +> +> vis::ClassifyResult res; +> if (!model.Predict(&im, &res, 5)) { +> std::cerr << "Prediction failed." << std::endl; +> return -1; +> } +> +> std::cout << res.label_ids[0] << " " << res.scores[0] << std::endl; +> return 0; +> } +``` diff --git a/external/onnxruntime.cmake b/external/onnxruntime.cmake new file mode 100644 index 0000000000..3e50c804f2 --- /dev/null +++ b/external/onnxruntime.cmake @@ -0,0 +1,90 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +include(ExternalProject) + +set(ONNXRUNTIME_PROJECT "extern_onnxruntime") +set(ONNXRUNTIME_PREFIX_DIR ${THIRD_PARTY_PATH}/onnxruntime) +set(ONNXRUNTIME_SOURCE_DIR + ${THIRD_PARTY_PATH}/onnxruntime/src/${ONNXRUNTIME_PROJECT}) +set(ONNXRUNTIME_INSTALL_DIR ${THIRD_PARTY_PATH}/install/onnxruntime) +set(ONNXRUNTIME_INC_DIR + "${ONNXRUNTIME_INSTALL_DIR}/include" + CACHE PATH "onnxruntime include directory." FORCE) +set(ONNXRUNTIME_LIB_DIR + "${ONNXRUNTIME_INSTALL_DIR}/lib" + CACHE PATH "onnxruntime lib directory." FORCE) +set(CMAKE_BUILD_RPATH "${CMAKE_BUILD_RPATH}" "${ONNXRUNTIME_LIB_DIR}") + +set(ONNXRUNTIME_VERSION "1.11.1") +set(ONNXRUNTIME_URL_PREFIX "https://bj.bcebos.com/paddle2onnx/libs/") + +if(WIN32) + if(WITH_GPU) + set(ONNXRUNTIME_FILENAME "onnxruntime-win-x64-gpu-${ONNXRUNTIME_VERSION}.zip") + else() + set(ONNXRUNTIME_FILENAME "onnxruntime-win-x64-${ONNXRUNTIME_VERSION}.zip") + endif() +elseif(APPLE) + if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "arm64") + set(ONNXRUNTIME_FILENAME "onnxruntime-osx-arm64-${ONNXRUNTIME_VERSION}.tgz") + else() + set(ONNXRUNTIME_FILENAME "onnxruntime-osx-x86_64-${ONNXRUNTIME_VERSION}.tgz") + endif() +else() + if(WITH_GPU) + set(ONNXRUNTIME_FILENAME "onnxruntime-linux-x64-gpu-${ONNXRUNTIME_VERSION}.tgz") + else() + set(ONNXRUNTIME_FILENAME "onnxruntime-linux-x64-${ONNXRUNTIME_VERSION}.tgz") + endif() +endif() +set(ONNXRUNTIME_URL "${ONNXRUNTIME_URL_PREFIX}${ONNXRUNTIME_FILENAME}") + +include_directories(${ONNXRUNTIME_INC_DIR} +)# For ONNXRUNTIME code to include internal headers. + +if(WIN32) + set(ONNXRUNTIME_LIB + "${ONNXRUNTIME_INSTALL_DIR}/lib/onnxruntime.lib" + CACHE FILEPATH "ONNXRUNTIME static library." FORCE) +elseif(APPLE) + set(ONNXRUNTIME_LIB + "${ONNXRUNTIME_INSTALL_DIR}/lib/libonnxruntime.dylib" + CACHE FILEPATH "ONNXRUNTIME static library." FORCE) +else() + set(ONNXRUNTIME_LIB + "${ONNXRUNTIME_INSTALL_DIR}/lib/libonnxruntime.so" + CACHE FILEPATH "ONNXRUNTIME static library." FORCE) +endif() + +ExternalProject_Add( + ${ONNXRUNTIME_PROJECT} + ${EXTERNAL_PROJECT_LOG_ARGS} + URL ${ONNXRUNTIME_URL} + PREFIX ${ONNXRUNTIME_PREFIX_DIR} + DOWNLOAD_NO_PROGRESS 1 + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + UPDATE_COMMAND "" + INSTALL_COMMAND + ${CMAKE_COMMAND} -E remove_directory ${ONNXRUNTIME_INSTALL_DIR} && + ${CMAKE_COMMAND} -E make_directory ${ONNXRUNTIME_INSTALL_DIR} && + ${CMAKE_COMMAND} -E rename ${ONNXRUNTIME_SOURCE_DIR}/lib/ ${ONNXRUNTIME_INSTALL_DIR}/lib && + ${CMAKE_COMMAND} -E copy_directory ${ONNXRUNTIME_SOURCE_DIR}/include + ${ONNXRUNTIME_INC_DIR} + BUILD_BYPRODUCTS ${ONNXRUNTIME_LIB}) + +add_library(external_onnxruntime STATIC IMPORTED GLOBAL) +set_property(TARGET external_onnxruntime PROPERTY IMPORTED_LOCATION ${ONNXRUNTIME_LIB}) +add_dependencies(external_onnxruntime ${ONNXRUNTIME_PROJECT}) diff --git a/external/opencv.cmake b/external/opencv.cmake new file mode 100644 index 0000000000..8ce8c56cc7 --- /dev/null +++ b/external/opencv.cmake @@ -0,0 +1,121 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +if(WIN32) + find_package(OpenCV REQUIRED PATHS ${OpenCV_DIR}) + list(APPEND DEPEND_LIBS ${OpenCV_LIBS}) +else() + +include(ExternalProject) + +set(OPENCV_PROJECT "extern_opencv") +set(OPENCV_PREFIX_DIR ${THIRD_PARTY_PATH}/opencv) +set(OPENCV_SOURCE_DIR + ${THIRD_PARTY_PATH}/opencv/src/${OPENCV_PROJECT}) +set(OPENCV_INSTALL_DIR ${THIRD_PARTY_PATH}/install/opencv) +set(OPENCV_INC_DIR + "${OPENCV_INSTALL_DIR}/include/" + CACHE PATH "opencv include directory." FORCE) +set(OPENCV_LIB_DIR + "${OPENCV_INSTALL_DIR}/lib" + CACHE PATH "opencv lib directory." FORCE) +set(CMAKE_BUILD_RPATH "${CMAKE_BUILD_RPATH}" "${OPENCV_LIB_DIR}") + +if(WIN32) + message(FATAL_ERROR "NOT SUPPORT WINDOWS NOW, OPENCV") +elseif(APPLE) + if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "arm64") + set(OPENCV_URL "https://bj.bcebos.com/paddle2onnx/libs/opencv-osx-arm64-3.4.16.tgz") + else() + set(OPENCV_URL "https://bj.bcebos.com/paddle2onnx/libs/opencv-osx-x86_64-3.4.16.tgz") + endif() +else() + set(OPENCV_URL "https://bj.bcebos.com/paddle2onnx/libs/opencv-linux-x64-3.4.16.tgz") + if(ENABLE_OPENCV_CUDA) + set(OPENCV_URL "https://bj.bcebos.com/paddle2onnx/libs/opencv-linux-x64-gpu-3.4.16.tgz") + endif() +endif() + +include_directories(${OPENCV_INC_DIR} +)# For OPENCV code to include internal headers. + +set(OPENCV_SOURCE_LIB ${OPENCV_SOURCE_DIR}/lib/) +if(WIN32) + message(FATAL_ERROR "NOT SUPPORT WEINDOWS, OPENCV") +elseif(APPLE) + set(OPENCV_CORE_LIB ${OPENCV_INSTALL_DIR}/lib/libopencv_core.dylib) + set(OPENCV_HIGHGUI_LIB ${OPENCV_INSTALL_DIR}/lib/libopencv_highgui.dylib) + set(OPENCV_IMGPROC_LIB ${OPENCV_INSTALL_DIR}/lib/libopencv_imgproc.dylib) + set(OPENCV_IMGCODESC_LIB ${OPENCV_INSTALL_DIR}/lib/libopencv_imgcodecs.dylib) +else() + set(OPENCV_SOURCE_LIB ${OPENCV_SOURCE_DIR}/lib64) + set(OPENCV_CORE_LIB ${OPENCV_INSTALL_DIR}/lib/libopencv_core.so) + set(OPENCV_HIGHGUI_LIB ${OPENCV_INSTALL_DIR}/lib/libopencv_highgui.so) + set(OPENCV_IMGPROC_LIB ${OPENCV_INSTALL_DIR}/lib/libopencv_imgproc.so) + set(OPENCV_IMGCODESC_LIB ${OPENCV_INSTALL_DIR}/lib/libopencv_imgcodecs.so) + set(OPENCV_CUDAARITHM_LIB ${OPENCV_INSTALL_DIR}/lib/libopencv_cudaarithm.so) + set(OPENCV_CUDAIMGPROC_LIB ${OPENCV_INSTALL_DIR}/lib/libopencv_cudaimgproc.so) + set(OPENCV_CUDAWARPING_LIB ${OPENCV_INSTALL_DIR}/lib/libopencv_cudawarping.so) +endif() + +if(WIN32) + message(FATAL_ERROR "NOT SUPPORT WINDOWS, OPENCV") +else() + ExternalProject_Add( + ${OPENCV_PROJECT} + ${EXTERNAL_PROJECT_LOG_ARGS} + URL ${OPENCV_URL} + PREFIX ${OPENCV_PREFIX_DIR} + DOWNLOAD_NO_PROGRESS 1 + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + UPDATE_COMMAND "" + INSTALL_COMMAND + ${CMAKE_COMMAND} -E remove_directory ${OPENCV_INSTALL_DIR} && + ${CMAKE_COMMAND} -E make_directory ${OPENCV_INSTALL_DIR} && + ${CMAKE_COMMAND} -E rename ${OPENCV_SOURCE_LIB} ${OPENCV_INSTALL_DIR}/lib && + ${CMAKE_COMMAND} -E copy_directory ${OPENCV_SOURCE_DIR}/include/ + ${OPENCV_INC_DIR} + BUILD_BYPRODUCTS ${OPENCV_LIB}) +endif() + +add_library(external_opencv_core STATIC IMPORTED GLOBAL) +set_property(TARGET external_opencv_core PROPERTY IMPORTED_LOCATION ${OPENCV_CORE_LIB}) +add_library(external_opencv_highgui STATIC IMPORTED GLOBAL) +set_property(TARGET external_opencv_highgui PROPERTY IMPORTED_LOCATION ${OPENCV_HIGHGUI_LIB}) +add_library(external_opencv_imgproc STATIC IMPORTED GLOBAL) +set_property(TARGET external_opencv_imgproc PROPERTY IMPORTED_LOCATION ${OPENCV_IMGPROC_LIB}) +add_library(external_opencv_imgcodesc STATIC IMPORTED GLOBAL) +set_property(TARGET external_opencv_imgcodesc PROPERTY IMPORTED_LOCATION ${OPENCV_IMGCODESC_LIB}) + +add_dependencies(external_opencv_core ${OPENCV_PROJECT}) +add_dependencies(external_opencv_highgui ${OPENCV_PROJECT}) +add_dependencies(external_opencv_imgproc ${OPENCV_PROJECT}) +add_dependencies(external_opencv_imgcodesc ${OPENCV_PROJECT}) + +list(APPEND DEPEND_LIBS external_opencv_core external_opencv_highgui external_opencv_imgproc external_opencv_imgcodesc) + +if(ENABLE_OPENCV_CUDA) + add_library(extern_opencv_cudawarping STATIC IMPORTED GLOBAL) + set_property(TARGET extern_opencv_cudawarping PROPERTY IMPORTED_LOCATION ${OPENCV_CUDAWARPING_LIB}) + add_dependencies(extern_opencv_cudawarping ${OPENCV_PROJECT}) + add_library(extern_opencv_cudaarithm STATIC IMPORTED GLOBAL) + set_property(TARGET extern_opencv_cudaarithm PROPERTY IMPORTED_LOCATION ${OPENCV_CUDAARITHM_LIB}) + add_dependencies(extern_opencv_cudaarithm ${OPENCV_PROJECT}) + add_library(extern_opencv_cudaimgproc STATIC IMPORTED GLOBAL) + set_property(TARGET extern_opencv_cudaimgproc PROPERTY IMPORTED_LOCATION ${OPENCV_CUDAIMGPROC_LIB}) + add_dependencies(extern_opencv_cudaimgproc ${OPENCV_PROJECT}) + list(APPEND DEPEND_LIBS extern_opencv_cudawarping extern_opencv_cudaarithm extern_opencv_cudaimgproc) +endif() +endif(WIN32) diff --git a/external/paddle2onnx.cmake b/external/paddle2onnx.cmake new file mode 100644 index 0000000000..b9847830a3 --- /dev/null +++ b/external/paddle2onnx.cmake @@ -0,0 +1,80 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +include(ExternalProject) + +set(PADDLE2ONNX_PROJECT "extern_paddle2onnx") +set(PADDLE2ONNX_PREFIX_DIR ${THIRD_PARTY_PATH}/paddle2onnx) +set(PADDLE2ONNX_SOURCE_DIR + ${THIRD_PARTY_PATH}/paddle2onnx/src/${PADDLE2ONNX_PROJECT}) +set(PADDLE2ONNX_INSTALL_DIR ${THIRD_PARTY_PATH}/install/paddle2onnx) +set(PADDLE2ONNX_INC_DIR + "${PADDLE2ONNX_INSTALL_DIR}/include" + CACHE PATH "paddle2onnx include directory." FORCE) +set(PADDLE2ONNX_LIB_DIR + "${PADDLE2ONNX_INSTALL_DIR}/lib/" + CACHE PATH "onnxruntime lib directory." FORCE) +set(CMAKE_BUILD_RPATH "${CMAKE_BUILD_RPATH}" + "${PADDLE2ONNX_LIB_DIR}") + +include_directories(${PADDLE2ONNX_INC_DIR}) +if(WIN32) + set(PADDLE2ONNX_COMPILE_LIB + "${PADDLE2ONNX_INSTALL_DIR}/lib/paddle2onnx.lib" + CACHE FILEPATH "paddle2onnx compile library." FORCE) +elseif(APPLE) + set(PADDLE2ONNX_COMPILE_LIB + "${PADDLE2ONNX_INSTALL_DIR}/lib/libpaddle2onnx.dylib" + CACHE FILEPATH "paddle2onnx compile library." FORCE) +else() + set(PADDLE2ONNX_COMPILE_LIB + "${PADDLE2ONNX_INSTALL_DIR}/lib/libpaddle2onnx.so" + CACHE FILEPATH "paddle2onnx compile library." FORCE) +endif(WIN32) + +set(PADDLE2ONNX_URL_BASE "https://bj.bcebos.com/paddle2onnx/libs/") +set(PADDLE2ONNX_VERSION "0.9.9") +if(WIN32) + set(PADDLE2ONNX_FILE "paddle2onnx-win-x64-${PADDLE2ONNX_VERSION}.zip") +elseif(APPLE) + if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "arm64") + set(PADDLE2ONNX_FILE "paddle2onnx-osx-arm64-${PADDLE2ONNX_VERSION}.tgz") + else() + set(PADDLE2ONNX_FILE "paddle2onnx-osx-x86_64-${PADDLE2ONNX_VERSION}.tgz") + endif() +else() + set(PADDLE2ONNX_FILE "paddle2onnx-linux-x64-${PADDLE2ONNX_VERSION}.tgz") +endif() +set(PADDLE2ONNX_URL "${PADDLE2ONNX_URL_BASE}${PADDLE2ONNX_FILE}") + +ExternalProject_Add( + ${PADDLE2ONNX_PROJECT} + ${EXTERNAL_PROJECT_LOG_ARGS} + URL ${PADDLE2ONNX_URL} + PREFIX ${PADDLE2ONNX_PREFIX_DIR} + DOWNLOAD_NO_PROGRESS 1 + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + UPDATE_COMMAND "" + INSTALL_COMMAND + ${CMAKE_COMMAND} -E remove_directory ${PADDLE2ONNX_INSTALL_DIR} && + ${CMAKE_COMMAND} -E make_directory ${PADDLE2ONNX_INSTALL_DIR} && + ${CMAKE_COMMAND} -E rename ${PADDLE2ONNX_SOURCE_DIR}/lib/ + ${PADDLE2ONNX_LIB_DIR} && ${CMAKE_COMMAND} -E copy_directory + ${PADDLE2ONNX_SOURCE_DIR}/include ${PADDLE2ONNX_INC_DIR} + BUILD_BYPRODUCTS ${PADDLE2ONNX_COMPILE_LIB}) + +add_library(external_paddle2onnx STATIC IMPORTED GLOBAL) +set_property(TARGET external_paddle2onnx PROPERTY IMPORTED_LOCATION + ${PADDLE2ONNX_COMPILE_LIB}) +add_dependencies(external_paddle2onnx ${PADDLE2ONNX_PROJECT}) diff --git a/external/summary.cmake b/external/summary.cmake new file mode 100644 index 0000000000..3c2393eda6 --- /dev/null +++ b/external/summary.cmake @@ -0,0 +1,44 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +function(fastdeploy_summary) + message(STATUS "") + message(STATUS "*************FastDeploy Building Summary**********") + message(STATUS " CMake version : ${CMAKE_VERSION}") + message(STATUS " CMake command : ${CMAKE_COMMAND}") + message(STATUS " System : ${CMAKE_SYSTEM_NAME}") + message(STATUS " C++ compiler : ${CMAKE_CXX_COMPILER}") + message(STATUS " C++ compiler version : ${CMAKE_CXX_COMPILER_VERSION}") + message(STATUS " CXX flags : ${CMAKE_CXX_FLAGS}") + message(STATUS " Build type : ${CMAKE_BUILD_TYPE}") + get_directory_property(tmp DIRECTORY ${PROJECT_SOURCE_DIR} COMPILE_DEFINITIONS) + message(STATUS " Compile definitions : ${tmp}") + message(STATUS " CMAKE_PREFIX_PATH : ${CMAKE_PREFIX_PATH}") + message(STATUS " CMAKE_INSTALL_PREFIX : ${CMAKE_INSTALL_PREFIX}") + message(STATUS " CMAKE_MODULE_PATH : ${CMAKE_MODULE_PATH}") + message(STATUS "") + message(STATUS " FastDeploy version : ${FASTDEPLOY_VERSION}") + message(STATUS " Paddle2ONNX version : ${PADDLE2ONNX_VERSION}") + message(STATUS " ONNXRuntime version : ${ONNXRUNTIME_VERSION}") + message(STATUS " ENABLE_ORT_BACKEND : ${ENABLE_ORT_BACKEND}") + if(WITH_GPU) + message(STATUS " WITH_GPU : ${WITH_GPU}") + message(STATUS " ENABLE_TRT_BACKEND : ${ENABLE_TRT_BACKEND}") + message(STATUS " CUDA_DIRECTORY : ${CUDA_DIRECTORY}") + message(STATUS " TRT_DRECTORY : ${TRT_DIRECTORY}") + endif() + message(STATUS " ENABLE_VISION : ${ENABLE_VISION}") + message(STATUS " ENABLE_DEBUG : ${ENABLE_DEBUG}") + message(STATUS " ENABLE_VISION_VISUALIZE : ${ENABLE_VISION_VISUALIZE}") +endfunction() diff --git a/external/utils.cmake b/external/utils.cmake new file mode 100644 index 0000000000..3e6d70b42d --- /dev/null +++ b/external/utils.cmake @@ -0,0 +1,15 @@ +# This function comes from https://blog.csdn.net/yindongjie1221/article/details/90614261 +function(redefine_file_macro targetname) + get_target_property(source_files "${targetname}" SOURCES) + foreach(sourcefile ${source_files}) + get_property(defs SOURCE "${sourcefile}" + PROPERTY COMPILE_DEFINITIONS) + get_filename_component(filepath "${sourcefile}" ABSOLUTE) + string(REPLACE ${PROJECT_SOURCE_DIR}/ "" relpath ${filepath}) + list(APPEND defs "__REL_FILE__=\"${relpath}\"") + set_property( + SOURCE "${sourcefile}" + PROPERTY COMPILE_DEFINITIONS ${defs} + ) + endforeach() +endfunction() diff --git a/fastdeploy/CMakeLists.txt b/fastdeploy/CMakeLists.txt new file mode 100644 index 0000000000..e69de29bb2 diff --git a/fastdeploy/LICENSE b/fastdeploy/LICENSE new file mode 100644 index 0000000000..261eeb9e9f --- /dev/null +++ b/fastdeploy/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/fastdeploy/ThirdPartyNotices.txt b/fastdeploy/ThirdPartyNotices.txt new file mode 100644 index 0000000000..5842b9a717 --- /dev/null +++ b/fastdeploy/ThirdPartyNotices.txt @@ -0,0 +1,734 @@ +This project depends on some open source projects, list as below + +-------- +1. https://github.com/protocolbuffers/protobuf + +Copyright 2008 Google Inc. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Code generated by the Protocol Buffer compiler is owned by the owner +of the input file used when generating it. This code is not +standalone and requires a support library to be linked with it. This +support library is itself covered by the above license. + +-------- +2. https://github.com/onnx/onnx + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +-------- +3. https://github.com/microsoft/onnxruntime + +MIT License + +Copyright (c) Microsoft Corporation + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +-------- +4. https://github.com/pybind/pybind11 + +Copyright (c) 2016 Wenzel Jakob , All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Please also refer to the file .github/CONTRIBUTING.md, which clarifies licensing of +external contributions to this project including patches, pull requests, etc. + +-------- +4. https://github.com/onnx/onnx-tensorrt + + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2021 NVIDIA Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +-------- +5. https://github.com/opencv/opencv + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +-------- +6. https://github.com/jbeder/yaml-cpp + +Copyright (c) 2008-2015 Jesse Beder. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/fastdeploy/__init__.py b/fastdeploy/__init__.py index 73f2b94c50..a9c16e9fea 100644 --- a/fastdeploy/__init__.py +++ b/fastdeploy/__init__.py @@ -11,189 +11,31 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - from __future__ import absolute_import -from six import text_type as _text_type -from .download import download, download_and_decompress - -import argparse - -# Since the source code is not fully open sourced, -# currently we will provide the prebuilt library -# and demo codes -import os - -__version__ = "0.1.0" - - -def parse_arguments(): - parser = argparse.ArgumentParser() - parser.add_argument( - '--model', - type=_text_type, - default=None, - help='Name of model, which can be listed by --list_models') - parser.add_argument( - '--platform', - type=_text_type, - default=None, - help='Define platform, supports Windows/Linux/Android/iOS.') - parser.add_argument( - '--soc', - type=_text_type, - default=None, - help='Define soc for the platform, supports x86/x86-NVIDIA_GPU/ARM/jetson.' - ) - parser.add_argument( - '--save_dir', - type=_text_type, - default=".", - help='Path to download and extract deployment SDK.') - parser.add_argument( - '--list_models', - required=False, - action="store_true", - default=False, - help='List all the supported models.') - parser.add_argument( - '--download_sdk', - required=False, - action="store_true", - default=False, - help='List all the supported models.') - - return parser.parse_args() - - -def read_sources(): - user_dir = os.path.expanduser('~') - print("Updating the newest sdk information...") - source_cfgs = "https://bj.bcebos.com/paddlehub/fastdeploy/fastdeploy_newest_sources.cfg.1" - if os.path.exists(os.path.join(user_dir, "fastdeploy_newest_sources.cfg.1")): - os.remove(os.path.join(user_dir, "fastdeploy_newest_sources.cfg.1")) - download(source_cfgs, user_dir) - categories = dict() - res = dict() - with open(os.path.join(user_dir, "fastdeploy_newest_sources.cfg.1")) as f: - for line in f: - if line.strip().startswith("#"): - continue - if line.strip() == "": - continue - category, model, plat, soc, url = line.strip().split('\t') - if category not in categories: - categories[category] = set() - categories[category].add(model) - if model not in res: - res[model] = dict() - if plat not in res[model]: - res[model][plat] = dict() - if soc not in res[model][plat]: - res[model][plat][soc] = dict() - res[model][plat][soc] = url - return categories, res - - -def main(): - args = parse_arguments() - - if not args.list_models and not args.download_sdk: - print( - "Please use flag --list_models to show all the supported models, or use flag --download_sdk to download the specify SDK to deploy you model." - ) - return - - categories, all_sources = read_sources() - all_models = list(all_sources.keys()) - all_models.sort() - - if args.list_models: - print("Currently, FastDeploy supports {} models, list as below,\n".format( - len(all_models))) - - for k, v in categories.items(): - print("\nModel Category: {}".format(k)) - print("_"*100) - models = list(categories[k]) - models.sort() - i = 0 - while i < len(models): - if i == len(models) - 1: - print(models[i].center(30)) - i += 1 - elif i == len(models) - 2: - print(models[i].center(30), models[i+1].center(30)) - i += 2 - else: - print(models[i].center(30), models[i+1].center(30), models[i+2].center(30)) - i += 3 - return - - if not os.path.exists(args.save_dir): - print("The specified save_dir: {} is not exist.".format(args.save_dir)) - return - - if args.model is None or args.model == "": - print( - "Please define --model to choose which kind of model to deploy, use --list_models to show all the supported models." - ) - return - - if args.model not in all_sources: - print( - "{} is not supported, use --list_models to list all the models FastDeploy supported.". - format(args.model)) - return - - if args.platform is None or args.platform == "": - print( - "Please define --platform to choose which platform to deploy, supports windows/linux/android/ios." - ) - return - - if args.platform not in all_sources[args.model]: - print( - "The model:{} only supports platform of {}, {} is not supported now.". - format(args.model, - list(all_sources[args.model].keys()), args.platform)) - return +import logging +from .fastdeploy_main import Frontend, Backend, FDDataType, TensorInfo, RuntimeOption, Device +from .fastdeploy_runtime import * +from . import fastdeploy_main as C +from . import vision - if args.soc is None or args.soc == "": - print( - "Please define --soc to choose which hardware to deploy, for model:{} and platform:{}, the available socs are {}.". - format(args.model, args.platform, - list(all_sources[args.model][args.platform].keys()))) - return - if args.soc not in all_sources[args.model][args.platform]: - print( - "The model:{} in platform:{} only supports soc of {}, {} is not supported now.". - format(args.model, args.platform, - list(all_sources[args.model][args.platform].keys()), - args.soc)) - return +def TensorInfoStr(tensor_info): + message = "TensorInfo(name : '{}', dtype : '{}', shape : '{}')".format( + tensor_info.name, tensor_info.dtype, tensor_info.shape) + return message - print("\nDownloading SDK:", - all_sources[args.model][args.platform][args.soc]) - save_dir = args.save_dir - sdk_name = os.path.split(all_sources[args.model][args.platform][args.soc])[ - -1].strip() - if all_sources[args.model][args.platform][args.soc].count(".zip") > 0: - sdk_name = os.path.split(all_sources[args.model][args.platform][ - args.soc])[-1].strip().split(".zip")[0] - new_save_dir = os.path.join(args.save_dir, sdk_name) - if not os.path.exists(new_save_dir): - os.mkdir(new_save_dir) - save_dir = new_save_dir - download_and_decompress( - all_sources[args.model][args.platform][args.soc], - new_save_dir, - rename=sdk_name + ".zip") - os.remove(os.path.join(new_save_dir, sdk_name + ".zip")) - print("Done. All the files of SDK have been extracted in {}.".format( - new_save_dir)) +def RuntimeOptionStr(runtime_option): + attrs = dir(runtime_option) + message = "RuntimeOption(\n" + for attr in attrs: + if attr.startswith("__"): + continue + message += " {} : {}\t\n".format(attr, getattr(runtime_option, attr)) + message.strip("\n") + message += ")" + return message -if __name__ == "__main__": - main() +C.TensorInfo.__repr__ = TensorInfoStr +C.RuntimeOption.__repr__ = RuntimeOptionStr diff --git a/fastdeploy/backends/backend.h b/fastdeploy/backends/backend.h new file mode 100644 index 0000000000..240f407340 --- /dev/null +++ b/fastdeploy/backends/backend.h @@ -0,0 +1,48 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include + +#include "fastdeploy/core/fd_tensor.h" + +namespace fastdeploy { + +struct TensorInfo { + std::string name; + std::vector shape; + FDDataType dtype; +}; + +class BaseBackend { + public: + bool initialized_ = false; + + BaseBackend() {} + + virtual bool Initialized() const { return initialized_; } + + virtual int NumInputs() const = 0; + virtual int NumOutputs() const = 0; + virtual TensorInfo GetInputInfo(int index) = 0; + virtual TensorInfo GetOutputInfo(int index) = 0; + virtual bool Infer(std::vector& inputs, + std::vector* outputs) = 0; +}; + +} // namespace fastdeploy diff --git a/fastdeploy/backends/ort/ort_backend.cc b/fastdeploy/backends/ort/ort_backend.cc new file mode 100644 index 0000000000..7060b758c1 --- /dev/null +++ b/fastdeploy/backends/ort/ort_backend.cc @@ -0,0 +1,278 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/backends/ort/ort_backend.h" +#include "fastdeploy/backends/ort/utils.h" +#include "fastdeploy/utils/utils.h" +#include +#ifdef ENABLE_PADDLE_FRONTEND +#include "paddle2onnx/converter.h" +#endif + +namespace fastdeploy { + +ONNXTensorElementDataType GetOrtDtype(FDDataType fd_dtype) { + if (fd_dtype == FDDataType::FP32) { + return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT; + } else if (fd_dtype == FDDataType::FP64) { + return ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE; + } else if (fd_dtype == FDDataType::INT32) { + return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32; + } else if (fd_dtype == FDDataType::INT64) { + return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64; + } + FDERROR << "Unrecognized fastdeply data type:" << FDDataTypeStr(fd_dtype) + << "." << std::endl; + return ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED; +} + +FDDataType GetFdDtype(ONNXTensorElementDataType ort_dtype) { + if (ort_dtype == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT) { + return FDDataType::FP32; + } else if (ort_dtype == ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE) { + return FDDataType::FP64; + } else if (ort_dtype == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32) { + return FDDataType::INT32; + } else if (ort_dtype == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64) { + return FDDataType::INT64; + } + FDERROR << "Unrecognized ort data type:" << ort_dtype << "." << std::endl; + return FDDataType::FP32; +} + +void OrtBackend::BuildOption(const OrtBackendOption& option) { + option_ = option; + if (option.graph_optimization_level >= 0) { + session_options_.SetGraphOptimizationLevel( + GraphOptimizationLevel(option.graph_optimization_level)); + } + if (option.intra_op_num_threads >= 0) { + session_options_.SetIntraOpNumThreads(option.intra_op_num_threads); + } + if (option.inter_op_num_threads >= 0) { + session_options_.SetInterOpNumThreads(option.inter_op_num_threads); + } + if (option.execution_mode >= 0) { + session_options_.SetExecutionMode(ExecutionMode(option.execution_mode)); + } + if (option.use_gpu) { + auto all_providers = Ort::GetAvailableProviders(); + bool support_cuda = false; + std::string providers_msg = ""; + for (size_t i = 0; i < all_providers.size(); ++i) { + providers_msg = providers_msg + all_providers[i] + ", "; + if (all_providers[i] == "CUDAExecutionProvider") { + support_cuda = true; + } + } + if (!support_cuda) { + FDLogger() << "[WARN] Compiled fastdeploy with onnxruntime doesn't " + "support GPU, the available providers are " + << providers_msg << "will fallback to CPUExecutionProvider." + << std::endl; + option_.use_gpu = false; + } else { + FDASSERT(option.gpu_id == 0, "Requires gpu_id == 0, but now gpu_id = " + + std::to_string(option.gpu_id) + "."); + OrtCUDAProviderOptions cuda_options; + cuda_options.device_id = option.gpu_id; + session_options_.AppendExecutionProvider_CUDA(cuda_options); + } + } +} + +bool OrtBackend::InitFromPaddle(const std::string& model_file, + const std::string& params_file, + const OrtBackendOption& option, bool verbose) { + if (initialized_) { + FDERROR << "OrtBackend is already initlized, cannot initialize again." + << std::endl; + return false; + } +#ifdef ENABLE_PADDLE_FRONTEND + char* model_content_ptr; + int model_content_size = 0; + if (!paddle2onnx::Export(model_file.c_str(), params_file.c_str(), + &model_content_ptr, &model_content_size, 11, true, + verbose, true, true, true)) { + FDERROR << "Error occured while export PaddlePaddle to ONNX format." + << std::endl; + return false; + } + std::string onnx_model_proto(model_content_ptr, + model_content_ptr + model_content_size); + delete model_content_ptr; + model_content_ptr = nullptr; + return InitFromOnnx(onnx_model_proto, option, true); +#else + FDERROR << "Didn't compile with PaddlePaddle frontend, you can try to " + "call `InitFromOnnx` instead." + << std::endl; +#endif + return false; +} + +bool OrtBackend::InitFromOnnx(const std::string& model_file, + const OrtBackendOption& option, + bool from_memory_buffer) { + if (initialized_) { + FDERROR << "OrtBackend is already initlized, cannot initialize again." + << std::endl; + return false; + } + BuildOption(option); + if (from_memory_buffer) { + session_ = {env_, model_file.data(), model_file.size(), session_options_}; + } else { +#ifdef _WIN32 + session_ = {env_, + std::wstring(model_file.begin(), model_file.end()).c_str(), + session_options_}; +#else + session_ = {env_, model_file.c_str(), session_options_}; +#endif + } + binding_ = std::make_shared(session_); + + Ort::MemoryInfo memory_info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault); + Ort::Allocator allocator(session_, memory_info); + size_t n_inputs = session_.GetInputCount(); + for (size_t i = 0; i < n_inputs; ++i) { + auto input_name = session_.GetInputName(i, allocator); + auto type_info = session_.GetInputTypeInfo(i); + std::vector shape = + type_info.GetTensorTypeAndShapeInfo().GetShape(); + ONNXTensorElementDataType data_type = + type_info.GetTensorTypeAndShapeInfo().GetElementType(); + inputs_desc_.emplace_back(OrtValueInfo{input_name, shape, data_type}); + allocator.Free(input_name); + } + + size_t n_outputs = session_.GetOutputCount(); + for (size_t i = 0; i < n_outputs; ++i) { + auto output_name = session_.GetOutputName(i, allocator); + auto type_info = session_.GetOutputTypeInfo(i); + std::vector shape = + type_info.GetTensorTypeAndShapeInfo().GetShape(); + ONNXTensorElementDataType data_type = + type_info.GetTensorTypeAndShapeInfo().GetElementType(); + outputs_desc_.emplace_back(OrtValueInfo{output_name, shape, data_type}); + + Ort::MemoryInfo out_memory_info("Cpu", OrtDeviceAllocator, 0, + OrtMemTypeDefault); + binding_->BindOutput(output_name, out_memory_info); + + allocator.Free(output_name); + } + initialized_ = true; + return true; +} + +void OrtBackend::CopyToCpu(const Ort::Value& value, FDTensor* tensor) { + const auto info = value.GetTensorTypeAndShapeInfo(); + const auto data_type = info.GetElementType(); + size_t numel = info.GetElementCount(); + tensor->shape = info.GetShape(); + + if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT) { + tensor->data.resize(numel * sizeof(float)); + memcpy(static_cast(tensor->Data()), value.GetTensorData(), + numel * sizeof(float)); + tensor->dtype = FDDataType::FP32; + } else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32) { + tensor->data.resize(numel * sizeof(int32_t)); + memcpy(static_cast(tensor->Data()), value.GetTensorData(), + numel * sizeof(int32_t)); + tensor->dtype = FDDataType::INT32; + } else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64) { + tensor->data.resize(numel * sizeof(int64_t)); + memcpy(static_cast(tensor->Data()), value.GetTensorData(), + numel * sizeof(int64_t)); + tensor->dtype = FDDataType::INT64; + } else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE) { + tensor->data.resize(numel * sizeof(double)); + memcpy(static_cast(tensor->Data()), value.GetTensorData(), + numel * sizeof(double)); + tensor->dtype = FDDataType::FP64; + } else { + FDASSERT(false, "Unrecognized data type of " + std::to_string(data_type) + + " while calling OrtBackend::CopyToCpu()."); + } +} + +bool OrtBackend::Infer(std::vector& inputs, + std::vector* outputs) { + if (inputs.size() != inputs_desc_.size()) { + FDERROR << "[OrtBackend] Size of the inputs(" << inputs.size() + << ") should keep same with the inputs of this model(" + << inputs_desc_.size() << ")." << std::endl; + return false; + } + + // from FDTensor to Ort Inputs + for (size_t i = 0; i < inputs.size(); ++i) { + auto ort_value = CreateOrtValue(inputs[i], option_.use_gpu); + binding_->BindInput(inputs[i].name.c_str(), ort_value); + } + + for (size_t i = 0; i < outputs_desc_.size(); ++i) { + Ort::MemoryInfo memory_info("Cpu", OrtDeviceAllocator, 0, + OrtMemTypeDefault); + binding_->BindOutput(outputs_desc_[i].name.c_str(), memory_info); + } + + // Inference with inputs + try { + session_.Run({}, *(binding_.get())); + } catch (const std::exception& e) { + FDERROR << "Failed to Infer: " << e.what() << std::endl; + return false; + } + + // Copy result after inference + std::vector ort_outputs = binding_->GetOutputValues(); + outputs->resize(ort_outputs.size()); + for (size_t i = 0; i < ort_outputs.size(); ++i) { + (*outputs)[i].name = outputs_desc_[i].name; + CopyToCpu(ort_outputs[i], &((*outputs)[i])); + } + return true; +} + +TensorInfo OrtBackend::GetInputInfo(int index) { + FDASSERT(index < NumInputs(), "The index:" + std::to_string(index) + + " should less than the number of inputs:" + + std::to_string(NumInputs()) + "."); + TensorInfo info; + info.name = inputs_desc_[index].name; + info.shape.assign(inputs_desc_[index].shape.begin(), + inputs_desc_[index].shape.end()); + info.dtype = GetFdDtype(inputs_desc_[index].dtype); + return info; +} + +TensorInfo OrtBackend::GetOutputInfo(int index) { + FDASSERT(index < NumOutputs(), + "The index:" + std::to_string(index) + + " should less than the number of outputs:" + + std::to_string(NumOutputs()) + "."); + TensorInfo info; + info.name = outputs_desc_[index].name; + info.shape.assign(outputs_desc_[index].shape.begin(), + outputs_desc_[index].shape.end()); + info.dtype = GetFdDtype(outputs_desc_[index].dtype); + return info; +} + +} // namespace fastdeploy diff --git a/fastdeploy/backends/ort/ort_backend.h b/fastdeploy/backends/ort/ort_backend.h new file mode 100644 index 0000000000..3200c29352 --- /dev/null +++ b/fastdeploy/backends/ort/ort_backend.h @@ -0,0 +1,84 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include + +#include "fastdeploy/backends/backend.h" +#include "onnxruntime_cxx_api.h" // NOLINT + +namespace fastdeploy { + +struct OrtValueInfo { + std::string name; + std::vector shape; + ONNXTensorElementDataType dtype; +}; + +struct OrtBackendOption { + // -1 means default + // 0: ORT_DISABLE_ALL + // 1: ORT_ENABLE_BASIC + // 2: ORT_ENABLE_EXTENDED + // 99: ORT_ENABLE_ALL (enable some custom optimizations e.g bert) + int graph_optimization_level = -1; + int intra_op_num_threads = -1; + int inter_op_num_threads = -1; + // 0: ORT_SEQUENTIAL + // 1: ORT_PARALLEL + int execution_mode = -1; + bool use_gpu = false; + int gpu_id = 0; +}; + +class OrtBackend : public BaseBackend { + public: + OrtBackend() {} + void BuildOption(const OrtBackendOption& option); + + bool InitFromPaddle(const std::string& model_file, + const std::string& params_file, + const OrtBackendOption& option = OrtBackendOption(), + bool verbose = false); + + bool InitFromOnnx(const std::string& model_file, + const OrtBackendOption& option = OrtBackendOption(), + bool from_memory_buffer = false); + + bool Infer(std::vector& inputs, std::vector* outputs); + + int NumInputs() const { return inputs_desc_.size(); } + + int NumOutputs() const { return outputs_desc_.size(); } + + TensorInfo GetInputInfo(int index); + TensorInfo GetOutputInfo(int index); + + private: + Ort::Env env_; + Ort::Session session_{nullptr}; + Ort::SessionOptions session_options_; + std::shared_ptr binding_; + std::vector inputs_desc_; + std::vector outputs_desc_; + + OrtBackendOption option_; + + void CopyToCpu(const Ort::Value& value, FDTensor* tensor); +}; +} // namespace fastdeploy diff --git a/fastdeploy/backends/ort/utils.cc b/fastdeploy/backends/ort/utils.cc new file mode 100644 index 0000000000..bbef1f3786 --- /dev/null +++ b/fastdeploy/backends/ort/utils.cc @@ -0,0 +1,67 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/backends/ort/utils.h" +#include "fastdeploy/utils/utils.h" + +namespace fastdeploy { + +ONNXTensorElementDataType GetOrtDtype(const FDDataType& fd_dtype) { + if (fd_dtype == FDDataType::FP32) { + return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT; + } else if (fd_dtype == FDDataType::FP64) { + return ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE; + } else if (fd_dtype == FDDataType::INT32) { + return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32; + } else if (fd_dtype == FDDataType::INT64) { + return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64; + } + FDERROR << "Unrecognized fastdeply data type:" << FDDataTypeStr(fd_dtype) + << "." << std::endl; + return ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED; +} + +FDDataType GetFdDtype(const ONNXTensorElementDataType& ort_dtype) { + if (ort_dtype == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT) { + return FDDataType::FP32; + } else if (ort_dtype == ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE) { + return FDDataType::FP64; + } else if (ort_dtype == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32) { + return FDDataType::INT32; + } else if (ort_dtype == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64) { + return FDDataType::INT64; + } + FDERROR << "Unrecognized ort data type:" << ort_dtype << "." << std::endl; + return FDDataType::FP32; +} + +Ort::Value CreateOrtValue(FDTensor& tensor, bool is_backend_cuda) { + FDASSERT(tensor.device == Device::GPU || tensor.device == Device::CPU, + "Only support tensor which device is CPU or GPU for OrtBackend."); + if (tensor.device == Device::GPU && is_backend_cuda) { + Ort::MemoryInfo memory_info("Cuda", OrtDeviceAllocator, 0, + OrtMemTypeDefault); + auto ort_value = Ort::Value::CreateTensor( + memory_info, tensor.MutableData(), tensor.Nbytes(), tensor.shape.data(), + tensor.shape.size(), GetOrtDtype(tensor.dtype)); + return ort_value; + } + Ort::MemoryInfo memory_info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault); + auto ort_value = Ort::Value::CreateTensor( + memory_info, tensor.Data(), tensor.Nbytes(), tensor.shape.data(), + tensor.shape.size(), GetOrtDtype(tensor.dtype)); + return ort_value; +} + +} // namespace fastdeploy diff --git a/fastdeploy/backends/ort/utils.h b/fastdeploy/backends/ort/utils.h new file mode 100644 index 0000000000..b1b29e5ab1 --- /dev/null +++ b/fastdeploy/backends/ort/utils.h @@ -0,0 +1,39 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include + +#include "fastdeploy/backends/backend.h" +#include "onnxruntime_cxx_api.h" // NOLINT + +namespace fastdeploy { + +// Convert FDDataType to OrtDataType +ONNXTensorElementDataType GetOrtDtype(const FDDataType& fd_dtype); + +// Convert OrtDataType to FDDataType +FDDataType GetFdDtype(const ONNXTensorElementDataType* ort_dtype); + +// Create Ort::Value +// is_backend_cuda specify if the onnxruntime use CUDAExectionProvider +// While is_backend_cuda = true, and tensor.device = Device::GPU +// Will directly share the cuda data in tensor to OrtValue +Ort::Value CreateOrtValue(FDTensor& tensor, bool is_backend_cuda = false); + +} // namespace fastdeploy diff --git a/fastdeploy/backends/tensorrt/common/BatchStream.h b/fastdeploy/backends/tensorrt/common/BatchStream.h new file mode 100644 index 0000000000..2484ccc689 --- /dev/null +++ b/fastdeploy/backends/tensorrt/common/BatchStream.h @@ -0,0 +1,342 @@ +/* + * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef BATCH_STREAM_H +#define BATCH_STREAM_H + +#include "NvInfer.h" +#include "common.h" +#include +#include +#include + +class IBatchStream { + public: + virtual void reset(int firstBatch) = 0; + virtual bool next() = 0; + virtual void skip(int skipCount) = 0; + virtual float* getBatch() = 0; + virtual float* getLabels() = 0; + virtual int getBatchesRead() const = 0; + virtual int getBatchSize() const = 0; + virtual nvinfer1::Dims getDims() const = 0; +}; + +class MNISTBatchStream : public IBatchStream { + public: + MNISTBatchStream(int batchSize, int maxBatches, const std::string& dataFile, + const std::string& labelsFile, + const std::vector& directories) + : mBatchSize{batchSize}, mMaxBatches{maxBatches}, mDims{3, {1, 28, 28}} + //!< We already know the dimensions of MNIST images. + { + readDataFile(locateFile(dataFile, directories)); + readLabelsFile(locateFile(labelsFile, directories)); + } + + void reset(int firstBatch) override { mBatchCount = firstBatch; } + + bool next() override { + if (mBatchCount >= mMaxBatches) { + return false; + } + ++mBatchCount; + return true; + } + + void skip(int skipCount) override { mBatchCount += skipCount; } + + float* getBatch() override { + return mData.data() + + (mBatchCount * mBatchSize * samplesCommon::volume(mDims)); + } + + float* getLabels() override { + return mLabels.data() + (mBatchCount * mBatchSize); + } + + int getBatchesRead() const override { return mBatchCount; } + + int getBatchSize() const override { return mBatchSize; } + + nvinfer1::Dims getDims() const override { + return Dims{4, {mBatchSize, mDims.d[0], mDims.d[1], mDims.d[2]}}; + } + + private: + void readDataFile(const std::string& dataFilePath) { + std::ifstream file{dataFilePath.c_str(), std::ios::binary}; + + int magicNumber, numImages, imageH, imageW; + file.read(reinterpret_cast(&magicNumber), sizeof(magicNumber)); + // All values in the MNIST files are big endian. + magicNumber = samplesCommon::swapEndianness(magicNumber); + ASSERT(magicNumber == 2051 && + "Magic Number does not match the expected value for an MNIST image " + "set"); + + // Read number of images and dimensions + file.read(reinterpret_cast(&numImages), sizeof(numImages)); + file.read(reinterpret_cast(&imageH), sizeof(imageH)); + file.read(reinterpret_cast(&imageW), sizeof(imageW)); + + numImages = samplesCommon::swapEndianness(numImages); + imageH = samplesCommon::swapEndianness(imageH); + imageW = samplesCommon::swapEndianness(imageW); + + // The MNIST data is made up of unsigned bytes, so we need to cast to float + // and normalize. + int numElements = numImages * imageH * imageW; + std::vector rawData(numElements); + file.read(reinterpret_cast(rawData.data()), + numElements * sizeof(uint8_t)); + mData.resize(numElements); + std::transform(rawData.begin(), rawData.end(), mData.begin(), + [](uint8_t val) { return static_cast(val) / 255.f; }); + } + + void readLabelsFile(const std::string& labelsFilePath) { + std::ifstream file{labelsFilePath.c_str(), std::ios::binary}; + int magicNumber, numImages; + file.read(reinterpret_cast(&magicNumber), sizeof(magicNumber)); + // All values in the MNIST files are big endian. + magicNumber = samplesCommon::swapEndianness(magicNumber); + ASSERT(magicNumber == 2049 && + "Magic Number does not match the expected value for an MNIST labels " + "file"); + + file.read(reinterpret_cast(&numImages), sizeof(numImages)); + numImages = samplesCommon::swapEndianness(numImages); + + std::vector rawLabels(numImages); + file.read(reinterpret_cast(rawLabels.data()), + numImages * sizeof(uint8_t)); + mLabels.resize(numImages); + std::transform(rawLabels.begin(), rawLabels.end(), mLabels.begin(), + [](uint8_t val) { return static_cast(val); }); + } + + int mBatchSize{0}; + int mBatchCount{ + 0}; //!< The batch that will be read on the next invocation of next() + int mMaxBatches{0}; + Dims mDims{}; + std::vector mData{}; + std::vector mLabels{}; +}; + +class BatchStream : public IBatchStream { + public: + BatchStream(int batchSize, int maxBatches, std::string prefix, + std::string suffix, std::vector directories) + : mBatchSize(batchSize), mMaxBatches(maxBatches), mPrefix(prefix), + mSuffix(suffix), mDataDir(directories) { + FILE* file = fopen( + locateFile(mPrefix + std::string("0") + mSuffix, mDataDir).c_str(), + "rb"); + ASSERT(file != nullptr); + int d[4]; + size_t readSize = fread(d, sizeof(int), 4, file); + ASSERT(readSize == 4); + mDims.nbDims = 4; // The number of dimensions. + mDims.d[0] = d[0]; // Batch Size + mDims.d[1] = d[1]; // Channels + mDims.d[2] = d[2]; // Height + mDims.d[3] = d[3]; // Width + ASSERT(mDims.d[0] > 0 && mDims.d[1] > 0 && mDims.d[2] > 0 && + mDims.d[3] > 0); + fclose(file); + + mImageSize = mDims.d[1] * mDims.d[2] * mDims.d[3]; + mBatch.resize(mBatchSize * mImageSize, 0); + mLabels.resize(mBatchSize, 0); + mFileBatch.resize(mDims.d[0] * mImageSize, 0); + mFileLabels.resize(mDims.d[0], 0); + reset(0); + } + + BatchStream(int batchSize, int maxBatches, std::string prefix, + std::vector directories) + : BatchStream(batchSize, maxBatches, prefix, ".batch", directories) {} + + BatchStream(int batchSize, int maxBatches, nvinfer1::Dims dims, + std::string listFile, std::vector directories) + : mBatchSize(batchSize), mMaxBatches(maxBatches), mDims(dims), + mListFile(listFile), mDataDir(directories) { + mImageSize = mDims.d[1] * mDims.d[2] * mDims.d[3]; + mBatch.resize(mBatchSize * mImageSize, 0); + mLabels.resize(mBatchSize, 0); + mFileBatch.resize(mDims.d[0] * mImageSize, 0); + mFileLabels.resize(mDims.d[0], 0); + reset(0); + } + + // Resets data members + void reset(int firstBatch) override { + mBatchCount = 0; + mFileCount = 0; + mFileBatchPos = mDims.d[0]; + skip(firstBatch); + } + + // Advance to next batch and return true, or return false if there is no batch + // left. + bool next() override { + if (mBatchCount == mMaxBatches) { + return false; + } + + for (int csize = 1, batchPos = 0; batchPos < mBatchSize; + batchPos += csize, mFileBatchPos += csize) { + ASSERT(mFileBatchPos > 0 && mFileBatchPos <= mDims.d[0]); + if (mFileBatchPos == mDims.d[0] && !update()) { + return false; + } + + // copy the smaller of: elements left to fulfill the request, or elements + // left in the file buffer. + csize = std::min(mBatchSize - batchPos, mDims.d[0] - mFileBatchPos); + std::copy_n(getFileBatch() + mFileBatchPos * mImageSize, + csize * mImageSize, getBatch() + batchPos * mImageSize); + std::copy_n(getFileLabels() + mFileBatchPos, csize, + getLabels() + batchPos); + } + mBatchCount++; + return true; + } + + // Skips the batches + void skip(int skipCount) override { + if (mBatchSize >= mDims.d[0] && mBatchSize % mDims.d[0] == 0 && + mFileBatchPos == mDims.d[0]) { + mFileCount += skipCount * mBatchSize / mDims.d[0]; + return; + } + + int x = mBatchCount; + for (int i = 0; i < skipCount; i++) { + next(); + } + mBatchCount = x; + } + + float* getBatch() override { return mBatch.data(); } + + float* getLabels() override { return mLabels.data(); } + + int getBatchesRead() const override { return mBatchCount; } + + int getBatchSize() const override { return mBatchSize; } + + nvinfer1::Dims getDims() const override { return mDims; } + + private: + float* getFileBatch() { return mFileBatch.data(); } + + float* getFileLabels() { return mFileLabels.data(); } + + bool update() { + if (mListFile.empty()) { + std::string inputFileName = locateFile( + mPrefix + std::to_string(mFileCount++) + mSuffix, mDataDir); + FILE* file = fopen(inputFileName.c_str(), "rb"); + if (!file) { + return false; + } + + int d[4]; + size_t readSize = fread(d, sizeof(int), 4, file); + ASSERT(readSize == 4); + ASSERT(mDims.d[0] == d[0] && mDims.d[1] == d[1] && mDims.d[2] == d[2] && + mDims.d[3] == d[3]); + size_t readInputCount = + fread(getFileBatch(), sizeof(float), mDims.d[0] * mImageSize, file); + ASSERT(readInputCount == size_t(mDims.d[0] * mImageSize)); + size_t readLabelCount = + fread(getFileLabels(), sizeof(float), mDims.d[0], file); + ASSERT(readLabelCount == 0 || readLabelCount == size_t(mDims.d[0])); + + fclose(file); + } else { + std::vector fNames; + std::ifstream file(locateFile(mListFile, mDataDir), std::ios::binary); + if (!file) { + return false; + } + + sample::gLogInfo << "Batch #" << mFileCount << std::endl; + file.seekg(((mBatchCount * mBatchSize)) * 7); + + for (int i = 1; i <= mBatchSize; i++) { + std::string sName; + std::getline(file, sName); + sName = sName + ".ppm"; + sample::gLogInfo << "Calibrating with file " << sName << std::endl; + fNames.emplace_back(sName); + } + + mFileCount++; + + const int imageC = 3; + const int imageH = 300; + const int imageW = 300; + std::vector> ppms( + fNames.size()); + for (uint32_t i = 0; i < fNames.size(); ++i) { + readPPMFile(locateFile(fNames[i], mDataDir), ppms[i]); + } + + std::vector data(samplesCommon::volume(mDims)); + const float scale = 2.0 / 255.0; + const float bias = 1.0; + long int volChl = mDims.d[2] * mDims.d[3]; + + // Normalize input data + for (int i = 0, volImg = mDims.d[1] * mDims.d[2] * mDims.d[3]; + i < mBatchSize; ++i) { + for (int c = 0; c < mDims.d[1]; ++c) { + for (int j = 0; j < volChl; ++j) { + data[i * volImg + c * volChl + j] = + scale * float(ppms[i].buffer[j * mDims.d[1] + c]) - bias; + } + } + } + + std::copy_n(data.data(), mDims.d[0] * mImageSize, getFileBatch()); + } + + mFileBatchPos = 0; + return true; + } + + int mBatchSize{0}; + int mMaxBatches{0}; + int mBatchCount{0}; + int mFileCount{0}; + int mFileBatchPos{0}; + int mImageSize{0}; + std::vector mBatch; //!< Data for the batch + std::vector mLabels; //!< Labels for the batch + std::vector mFileBatch; //!< List of image files + std::vector mFileLabels; //!< List of label files + std::string mPrefix; //!< Batch file name prefix + std::string mSuffix; //!< Batch file name suffix + nvinfer1::Dims mDims; //!< Input dimensions + std::string mListFile; //!< File name of the list of image names + std::vector + mDataDir; //!< Directories where the files can be found +}; + +#endif diff --git a/fastdeploy/backends/tensorrt/common/CPPLINT.cfg b/fastdeploy/backends/tensorrt/common/CPPLINT.cfg new file mode 100644 index 0000000000..51ff339c18 --- /dev/null +++ b/fastdeploy/backends/tensorrt/common/CPPLINT.cfg @@ -0,0 +1 @@ +exclude_files=.* diff --git a/fastdeploy/backends/tensorrt/common/EntropyCalibrator.h b/fastdeploy/backends/tensorrt/common/EntropyCalibrator.h new file mode 100644 index 0000000000..40eb8f13e6 --- /dev/null +++ b/fastdeploy/backends/tensorrt/common/EntropyCalibrator.h @@ -0,0 +1,118 @@ +/* + * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ENTROPY_CALIBRATOR_H +#define ENTROPY_CALIBRATOR_H + +#include "BatchStream.h" +#include "NvInfer.h" + +//! \class EntropyCalibratorImpl +//! +//! \brief Implements common functionality for Entropy calibrators. +//! +template class EntropyCalibratorImpl { + public: + EntropyCalibratorImpl(TBatchStream stream, int firstBatch, + std::string networkName, const char* inputBlobName, + bool readCache = true) + : mStream{stream}, + mCalibrationTableName("CalibrationTable" + networkName), + mInputBlobName(inputBlobName), mReadCache(readCache) { + nvinfer1::Dims dims = mStream.getDims(); + mInputCount = samplesCommon::volume(dims); + CHECK(cudaMalloc(&mDeviceInput, mInputCount * sizeof(float))); + mStream.reset(firstBatch); + } + + virtual ~EntropyCalibratorImpl() { CHECK(cudaFree(mDeviceInput)); } + + int getBatchSize() const noexcept { return mStream.getBatchSize(); } + + bool getBatch(void* bindings[], const char* names[], + int nbBindings) noexcept { + if (!mStream.next()) { + return false; + } + CHECK(cudaMemcpy(mDeviceInput, mStream.getBatch(), + mInputCount * sizeof(float), cudaMemcpyHostToDevice)); + ASSERT(!strcmp(names[0], mInputBlobName)); + bindings[0] = mDeviceInput; + return true; + } + + const void* readCalibrationCache(size_t& length) noexcept { + mCalibrationCache.clear(); + std::ifstream input(mCalibrationTableName, std::ios::binary); + input >> std::noskipws; + if (mReadCache && input.good()) { + std::copy(std::istream_iterator(input), + std::istream_iterator(), + std::back_inserter(mCalibrationCache)); + } + length = mCalibrationCache.size(); + return length ? mCalibrationCache.data() : nullptr; + } + + void writeCalibrationCache(const void* cache, size_t length) noexcept { + std::ofstream output(mCalibrationTableName, std::ios::binary); + output.write(reinterpret_cast(cache), length); + } + + private: + TBatchStream mStream; + size_t mInputCount; + std::string mCalibrationTableName; + const char* mInputBlobName; + bool mReadCache{true}; + void* mDeviceInput{nullptr}; + std::vector mCalibrationCache; +}; + +//! \class Int8EntropyCalibrator2 +//! +//! \brief Implements Entropy calibrator 2. +//! CalibrationAlgoType is kENTROPY_CALIBRATION_2. +//! +template +class Int8EntropyCalibrator2 : public IInt8EntropyCalibrator2 { + public: + Int8EntropyCalibrator2(TBatchStream stream, int firstBatch, + const char* networkName, const char* inputBlobName, + bool readCache = true) + : mImpl(stream, firstBatch, networkName, inputBlobName, readCache) {} + + int getBatchSize() const noexcept override { return mImpl.getBatchSize(); } + + bool getBatch(void* bindings[], const char* names[], + int nbBindings) noexcept override { + return mImpl.getBatch(bindings, names, nbBindings); + } + + const void* readCalibrationCache(size_t& length) noexcept override { + return mImpl.readCalibrationCache(length); + } + + void writeCalibrationCache(const void* cache, + size_t length) noexcept override { + mImpl.writeCalibrationCache(cache, length); + } + + private: + EntropyCalibratorImpl mImpl; +}; + +#endif // ENTROPY_CALIBRATOR_H diff --git a/fastdeploy/backends/tensorrt/common/ErrorRecorder.h b/fastdeploy/backends/tensorrt/common/ErrorRecorder.h new file mode 100644 index 0000000000..e13b55bd93 --- /dev/null +++ b/fastdeploy/backends/tensorrt/common/ErrorRecorder.h @@ -0,0 +1,115 @@ +/* + * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ERROR_RECORDER_H +#define ERROR_RECORDER_H +#include "NvInferRuntimeCommon.h" +#include "logger.h" +#include +#include +#include +#include +#include + +using nvinfer1::ErrorCode; +using nvinfer1::IErrorRecorder; + +//! +//! A simple implementation of the IErrorRecorder interface for +//! use by samples. This interface also can be used as a reference +//! implementation. +//! The sample Error recorder is based on a vector that pairs the error +//! code and the error string into a single element. It also uses +//! standard mutex's and atomics in order to make sure that the code +//! works in a multi-threaded environment. +//! +class SampleErrorRecorder : public IErrorRecorder { + using errorPair = std::pair; + using errorStack = std::vector; + + public: + SampleErrorRecorder() = default; + + virtual ~SampleErrorRecorder() noexcept {} + int32_t getNbErrors() const noexcept final { return mErrorStack.size(); } + ErrorCode getErrorCode(int32_t errorIdx) const noexcept final { + return invalidIndexCheck(errorIdx) ? ErrorCode::kINVALID_ARGUMENT + : (*this)[errorIdx].first; + }; + IErrorRecorder::ErrorDesc + getErrorDesc(int32_t errorIdx) const noexcept final { + return invalidIndexCheck(errorIdx) ? "errorIdx out of range." + : (*this)[errorIdx].second.c_str(); + } + // This class can never overflow since we have dynamic resize via std::vector + // usage. + bool hasOverflowed() const noexcept final { return false; } + + // Empty the errorStack. + void clear() noexcept final { + try { + // grab a lock so that there is no addition while clearing. + std::lock_guard guard(mStackLock); + mErrorStack.clear(); + } catch (const std::exception& e) { + sample::gLogFatal << "Internal Error: " << e.what() << std::endl; + } + }; + + //! Simple helper function that + bool empty() const noexcept { return mErrorStack.empty(); } + + bool reportError(ErrorCode val, + IErrorRecorder::ErrorDesc desc) noexcept final { + try { + std::lock_guard guard(mStackLock); + sample::gLogError << "Error[" << static_cast(val) + << "]: " << desc << std::endl; + mErrorStack.push_back(errorPair(val, desc)); + } catch (const std::exception& e) { + sample::gLogFatal << "Internal Error: " << e.what() << std::endl; + } + // All errors are considered fatal. + return true; + } + + // Atomically increment or decrement the ref counter. + IErrorRecorder::RefCount incRefCount() noexcept final { return ++mRefCount; } + IErrorRecorder::RefCount decRefCount() noexcept final { return --mRefCount; } + + private: + // Simple helper functions. + const errorPair& operator[](size_t index) const noexcept { + return mErrorStack[index]; + } + + bool invalidIndexCheck(int32_t index) const noexcept { + // By converting signed to unsigned, we only need a single check since + // negative numbers turn into large positive greater than the size. + size_t sIndex = index; + return sIndex >= mErrorStack.size(); + } + // Mutex to hold when locking mErrorStack. + std::mutex mStackLock; + + // Reference count of the class. Destruction of the class when mRefCount + // is not zero causes undefined behavior. + std::atomic mRefCount{0}; + + // The error stack that holds the errors recorded by TensorRT. + errorStack mErrorStack; +}; // class SampleErrorRecorder +#endif // ERROR_RECORDER_H diff --git a/fastdeploy/backends/tensorrt/common/README.md b/fastdeploy/backends/tensorrt/common/README.md new file mode 100644 index 0000000000..0ed86b17a5 --- /dev/null +++ b/fastdeploy/backends/tensorrt/common/README.md @@ -0,0 +1 @@ +目录代码来源自 https://github.com/NVIDIA/TensorRT diff --git a/fastdeploy/backends/tensorrt/common/argsParser.h b/fastdeploy/backends/tensorrt/common/argsParser.h new file mode 100644 index 0000000000..e2e1b1e95f --- /dev/null +++ b/fastdeploy/backends/tensorrt/common/argsParser.h @@ -0,0 +1,169 @@ +/* + * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef TENSORRT_ARGS_PARSER_H +#define TENSORRT_ARGS_PARSER_H + +#include +#include +#ifdef _MSC_VER +#include ".\windows\getopt.h" +#else +#include +#endif +#include + +namespace samplesCommon { + +//! +//! \brief The SampleParams structure groups the basic parameters required by +//! all sample networks. +//! +struct SampleParams { + int32_t batchSize{1}; //!< Number of inputs in a batch + int32_t dlaCore{-1}; //!< Specify the DLA core to run network on. + bool int8{false}; //!< Allow runnning the network in Int8 mode. + bool fp16{false}; //!< Allow running the network in FP16 mode. + std::vector + dataDirs; //!< Directory paths where sample data files are stored + std::vector inputTensorNames; + std::vector outputTensorNames; +}; + +//! +//! \brief The CaffeSampleParams structure groups the additional parameters +//! required by +//! networks that use caffe +//! +struct CaffeSampleParams : public SampleParams { + std::string + prototxtFileName; //!< Filename of prototxt design file of a network + std::string + weightsFileName; //!< Filename of trained weights file of a network + std::string meanFileName; //!< Filename of mean file of a network +}; + +//! +//! \brief The OnnxSampleParams structure groups the additional parameters +//! required by +//! networks that use ONNX +//! +struct OnnxSampleParams : public SampleParams { + std::string onnxFileName; //!< Filename of ONNX file of a network +}; + +//! +//! \brief The UffSampleParams structure groups the additional parameters +//! required by +//! networks that use Uff +//! +struct UffSampleParams : public SampleParams { + std::string uffFileName; //!< Filename of uff file of a network +}; + +//! +//! /brief Struct to maintain command-line arguments. +//! +struct Args { + bool runInInt8{false}; + bool runInFp16{false}; + bool help{false}; + int32_t useDLACore{-1}; + int32_t batch{1}; + std::vector dataDirs; + std::string saveEngine; + std::string loadEngine; + bool useILoop{false}; +}; + +//! +//! \brief Populates the Args struct with the provided command-line parameters. +//! +//! \throw invalid_argument if any of the arguments are not valid +//! +//! \return boolean If return value is true, execution can continue, otherwise +//! program should exit +//! +inline bool parseArgs(Args& args, int32_t argc, char* argv[]) { + while (1) { + int32_t arg; + static struct option long_options[] = { + {"help", no_argument, 0, 'h'}, + {"datadir", required_argument, 0, 'd'}, + {"int8", no_argument, 0, 'i'}, + {"fp16", no_argument, 0, 'f'}, + {"useILoop", no_argument, 0, 'l'}, + {"saveEngine", required_argument, 0, 's'}, + {"loadEngine", no_argument, 0, 'o'}, + {"useDLACore", required_argument, 0, 'u'}, + {"batch", required_argument, 0, 'b'}, + {nullptr, 0, nullptr, 0}}; + int32_t option_index = 0; + arg = getopt_long(argc, argv, "hd:iu", long_options, &option_index); + if (arg == -1) { + break; + } + + switch (arg) { + case 'h': + args.help = true; + return true; + case 'd': + if (optarg) { + args.dataDirs.push_back(optarg); + } else { + std::cerr << "ERROR: --datadir requires option argument" << std::endl; + return false; + } + break; + case 's': + if (optarg) { + args.saveEngine = optarg; + } + break; + case 'o': + if (optarg) { + args.loadEngine = optarg; + } + break; + case 'i': + args.runInInt8 = true; + break; + case 'f': + args.runInFp16 = true; + break; + case 'l': + args.useILoop = true; + break; + case 'u': + if (optarg) { + args.useDLACore = std::stoi(optarg); + } + break; + case 'b': + if (optarg) { + args.batch = std::stoi(optarg); + } + break; + default: + return false; + } + } + return true; +} + +} // namespace samplesCommon + +#endif // TENSORRT_ARGS_PARSER_H diff --git a/fastdeploy/backends/tensorrt/common/buffers.h b/fastdeploy/backends/tensorrt/common/buffers.h new file mode 100644 index 0000000000..8061ee33d1 --- /dev/null +++ b/fastdeploy/backends/tensorrt/common/buffers.h @@ -0,0 +1,426 @@ +/* + * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef TENSORRT_BUFFERS_H +#define TENSORRT_BUFFERS_H + +#include "NvInfer.h" +#include "common.h" +#include "half.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace samplesCommon { + +//! +//! \brief The GenericBuffer class is a templated class for buffers. +//! +//! \details This templated RAII (Resource Acquisition Is Initialization) class +//! handles the allocation, +//! deallocation, querying of buffers on both the device and the host. +//! It can handle data of arbitrary types because it stores byte +//! buffers. +//! The template parameters AllocFunc and FreeFunc are used for the +//! allocation and deallocation of the buffer. +//! AllocFunc must be a functor that takes in (void** ptr, size_t size) +//! and returns bool. ptr is a pointer to where the allocated buffer +//! address should be stored. +//! size is the amount of memory in bytes to allocate. +//! The boolean indicates whether or not the memory allocation was +//! successful. +//! FreeFunc must be a functor that takes in (void* ptr) and returns +//! void. +//! ptr is the allocated buffer address. It must work with nullptr +//! input. +//! +template class GenericBuffer { + public: + //! + //! \brief Construct an empty buffer. + //! + GenericBuffer(nvinfer1::DataType type = nvinfer1::DataType::kFLOAT) + : mSize(0), mCapacity(0), mType(type), mBuffer(nullptr) {} + + //! + //! \brief Construct a buffer with the specified allocation size in bytes. + //! + GenericBuffer(size_t size, nvinfer1::DataType type) + : mSize(size), mCapacity(size), mType(type) { + if (!allocFn(&mBuffer, this->nbBytes())) { + throw std::bad_alloc(); + } + } + + GenericBuffer(GenericBuffer&& buf) + : mSize(buf.mSize), mCapacity(buf.mCapacity), mType(buf.mType), + mBuffer(buf.mBuffer) { + buf.mSize = 0; + buf.mCapacity = 0; + buf.mType = nvinfer1::DataType::kFLOAT; + buf.mBuffer = nullptr; + } + + GenericBuffer& operator=(GenericBuffer&& buf) { + if (this != &buf) { + freeFn(mBuffer); + mSize = buf.mSize; + mCapacity = buf.mCapacity; + mType = buf.mType; + mBuffer = buf.mBuffer; + // Reset buf. + buf.mSize = 0; + buf.mCapacity = 0; + buf.mBuffer = nullptr; + } + return *this; + } + + //! + //! \brief Returns pointer to underlying array. + //! + void* data() { return mBuffer; } + + //! + //! \brief Returns pointer to underlying array. + //! + const void* data() const { return mBuffer; } + + //! + //! \brief Returns the size (in number of elements) of the buffer. + //! + size_t size() const { return mSize; } + + //! + //! \brief Returns the size (in bytes) of the buffer. + //! + size_t nbBytes() const { + return this->size() * samplesCommon::getElementSize(mType); + } + + //! + //! \brief Resizes the buffer. This is a no-op if the new size is smaller than + //! or equal to the current capacity. + //! + void resize(size_t newSize) { + mSize = newSize; + if (mCapacity < newSize) { + freeFn(mBuffer); + if (!allocFn(&mBuffer, this->nbBytes())) { + throw std::bad_alloc{}; + } + mCapacity = newSize; + } + } + + //! + //! \brief Overload of resize that accepts Dims + //! + void resize(const nvinfer1::Dims& dims) { + return this->resize(samplesCommon::volume(dims)); + } + + ~GenericBuffer() { freeFn(mBuffer); } + + private: + size_t mSize{0}, mCapacity{0}; + nvinfer1::DataType mType; + void* mBuffer; + AllocFunc allocFn; + FreeFunc freeFn; +}; + +class DeviceAllocator { + public: + bool operator()(void** ptr, size_t size) const { + return cudaMalloc(ptr, size) == cudaSuccess; + } +}; + +class DeviceFree { + public: + void operator()(void* ptr) const { cudaFree(ptr); } +}; + +class HostAllocator { + public: + bool operator()(void** ptr, size_t size) const { + *ptr = malloc(size); + return *ptr != nullptr; + } +}; + +class HostFree { + public: + void operator()(void* ptr) const { free(ptr); } +}; + +using DeviceBuffer = GenericBuffer; +using HostBuffer = GenericBuffer; + +//! +//! \brief The ManagedBuffer class groups together a pair of corresponding +//! device and host buffers. +//! +class ManagedBuffer { + public: + DeviceBuffer deviceBuffer; + HostBuffer hostBuffer; +}; + +//! +//! \brief The BufferManager class handles host and device buffer allocation +//! and deallocation. +//! +//! \details This RAII class handles host and device buffer allocation and +//! deallocation, +//! memcpy between host and device buffers to aid with inference, +//! and debugging dumps to validate inference. The BufferManager class +//! is meant to be +//! used to simplify buffer management and any interactions between +//! buffers and the engine. +//! +class BufferManager { + public: + static const size_t kINVALID_SIZE_VALUE = ~size_t(0); + + //! + //! \brief Create a BufferManager for handling buffer interactions with + //! engine. + //! + BufferManager(std::shared_ptr engine, + const int batchSize = 0, + const nvinfer1::IExecutionContext* context = nullptr) + : mEngine(engine), mBatchSize(batchSize) { + // Full Dims implies no batch size. + assert(engine->hasImplicitBatchDimension() || mBatchSize == 0); + // Create host and device buffers + for (int i = 0; i < mEngine->getNbBindings(); i++) { + auto dims = context ? context->getBindingDimensions(i) + : mEngine->getBindingDimensions(i); + size_t vol = context || !mBatchSize ? 1 : static_cast(mBatchSize); + nvinfer1::DataType type = mEngine->getBindingDataType(i); + int vecDim = mEngine->getBindingVectorizedDim(i); + if (-1 != vecDim) // i.e., 0 != lgScalarsPerVector + { + int scalarsPerVec = mEngine->getBindingComponentsPerElement(i); + dims.d[vecDim] = divUp(dims.d[vecDim], scalarsPerVec); + vol *= scalarsPerVec; + } + vol *= samplesCommon::volume(dims); + std::unique_ptr manBuf{new ManagedBuffer()}; + manBuf->deviceBuffer = DeviceBuffer(vol, type); + manBuf->hostBuffer = HostBuffer(vol, type); + mDeviceBindings.emplace_back(manBuf->deviceBuffer.data()); + mManagedBuffers.emplace_back(std::move(manBuf)); + } + } + + //! + //! \brief Returns a vector of device buffers that you can use directly as + //! bindings for the execute and enqueue methods of IExecutionContext. + //! + std::vector& getDeviceBindings() { return mDeviceBindings; } + + //! + //! \brief Returns a vector of device buffers. + //! + const std::vector& getDeviceBindings() const { + return mDeviceBindings; + } + + //! + //! \brief Returns the device buffer corresponding to tensorName. + //! Returns nullptr if no such tensor can be found. + //! + void* getDeviceBuffer(const std::string& tensorName) const { + return getBuffer(false, tensorName); + } + + //! + //! \brief Returns the host buffer corresponding to tensorName. + //! Returns nullptr if no such tensor can be found. + //! + void* getHostBuffer(const std::string& tensorName) const { + return getBuffer(true, tensorName); + } + + //! + //! \brief Returns the size of the host and device buffers that correspond to + //! tensorName. + //! Returns kINVALID_SIZE_VALUE if no such tensor can be found. + //! + size_t size(const std::string& tensorName) const { + int index = mEngine->getBindingIndex(tensorName.c_str()); + if (index == -1) + return kINVALID_SIZE_VALUE; + return mManagedBuffers[index]->hostBuffer.nbBytes(); + } + + //! + //! \brief Dump host buffer with specified tensorName to ostream. + //! Prints error message to std::ostream if no such tensor can be + //! found. + //! + void dumpBuffer(std::ostream& os, const std::string& tensorName) { + int index = mEngine->getBindingIndex(tensorName.c_str()); + if (index == -1) { + os << "Invalid tensor name" << std::endl; + return; + } + void* buf = mManagedBuffers[index]->hostBuffer.data(); + size_t bufSize = mManagedBuffers[index]->hostBuffer.nbBytes(); + nvinfer1::Dims bufDims = mEngine->getBindingDimensions(index); + size_t rowCount = static_cast( + bufDims.nbDims > 0 ? bufDims.d[bufDims.nbDims - 1] : mBatchSize); + int leadDim = mBatchSize; + int* trailDims = bufDims.d; + int nbDims = bufDims.nbDims; + + // Fix explicit Dimension networks + if (!leadDim && nbDims > 0) { + leadDim = bufDims.d[0]; + ++trailDims; + --nbDims; + } + + os << "[" << leadDim; + for (int i = 0; i < nbDims; i++) + os << ", " << trailDims[i]; + os << "]" << std::endl; + switch (mEngine->getBindingDataType(index)) { + case nvinfer1::DataType::kINT32: + print(os, buf, bufSize, rowCount); + break; + case nvinfer1::DataType::kFLOAT: + print(os, buf, bufSize, rowCount); + break; + case nvinfer1::DataType::kHALF: + print(os, buf, bufSize, rowCount); + break; + case nvinfer1::DataType::kINT8: + assert(0 && "Int8 network-level input and output is not supported"); + break; + case nvinfer1::DataType::kBOOL: + assert(0 && "Bool network-level input and output are not supported"); + break; + } + } + + //! + //! \brief Templated print function that dumps buffers of arbitrary type to + //! std::ostream. + //! rowCount parameter controls how many elements are on each line. + //! A rowCount of 1 means that there is only 1 element on each line. + //! + template + void print(std::ostream& os, void* buf, size_t bufSize, size_t rowCount) { + assert(rowCount != 0); + assert(bufSize % sizeof(T) == 0); + T* typedBuf = static_cast(buf); + size_t numItems = bufSize / sizeof(T); + for (int i = 0; i < static_cast(numItems); i++) { + // Handle rowCount == 1 case + if (rowCount == 1 && i != static_cast(numItems) - 1) + os << typedBuf[i] << std::endl; + else if (rowCount == 1) + os << typedBuf[i]; + // Handle rowCount > 1 case + else if (i % rowCount == 0) + os << typedBuf[i]; + else if (i % rowCount == rowCount - 1) + os << " " << typedBuf[i] << std::endl; + else + os << " " << typedBuf[i]; + } + } + + //! + //! \brief Copy the contents of input host buffers to input device buffers + //! synchronously. + //! + void copyInputToDevice() { memcpyBuffers(true, false, false); } + + //! + //! \brief Copy the contents of output device buffers to output host buffers + //! synchronously. + //! + void copyOutputToHost() { memcpyBuffers(false, true, false); } + + //! + //! \brief Copy the contents of input host buffers to input device buffers + //! asynchronously. + //! + void copyInputToDeviceAsync(const cudaStream_t& stream = 0) { + memcpyBuffers(true, false, true, stream); + } + + //! + //! \brief Copy the contents of output device buffers to output host buffers + //! asynchronously. + //! + void copyOutputToHostAsync(const cudaStream_t& stream = 0) { + memcpyBuffers(false, true, true, stream); + } + + ~BufferManager() = default; + + private: + void* getBuffer(const bool isHost, const std::string& tensorName) const { + int index = mEngine->getBindingIndex(tensorName.c_str()); + if (index == -1) + return nullptr; + return (isHost ? mManagedBuffers[index]->hostBuffer.data() + : mManagedBuffers[index]->deviceBuffer.data()); + } + + void memcpyBuffers(const bool copyInput, const bool deviceToHost, + const bool async, const cudaStream_t& stream = 0) { + for (int i = 0; i < mEngine->getNbBindings(); i++) { + void* dstPtr = deviceToHost ? mManagedBuffers[i]->hostBuffer.data() + : mManagedBuffers[i]->deviceBuffer.data(); + const void* srcPtr = deviceToHost + ? mManagedBuffers[i]->deviceBuffer.data() + : mManagedBuffers[i]->hostBuffer.data(); + const size_t byteSize = mManagedBuffers[i]->hostBuffer.nbBytes(); + const cudaMemcpyKind memcpyType = + deviceToHost ? cudaMemcpyDeviceToHost : cudaMemcpyHostToDevice; + if ((copyInput && mEngine->bindingIsInput(i)) || + (!copyInput && !mEngine->bindingIsInput(i))) { + if (async) + CHECK(cudaMemcpyAsync(dstPtr, srcPtr, byteSize, memcpyType, stream)); + else + CHECK(cudaMemcpy(dstPtr, srcPtr, byteSize, memcpyType)); + } + } + } + + std::shared_ptr mEngine; //!< The pointer to the engine + int mBatchSize; //!< The batch size for legacy networks, 0 otherwise. + std::vector> + mManagedBuffers; //!< The vector of pointers to managed buffers + std::vector mDeviceBindings; //!< The vector of device buffers needed + //! for engine execution +}; + +} // namespace samplesCommon + +#endif // TENSORRT_BUFFERS_H diff --git a/fastdeploy/backends/tensorrt/common/common.h b/fastdeploy/backends/tensorrt/common/common.h new file mode 100644 index 0000000000..ad3af72a2b --- /dev/null +++ b/fastdeploy/backends/tensorrt/common/common.h @@ -0,0 +1,844 @@ +/* + * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef TENSORRT_COMMON_H +#define TENSORRT_COMMON_H + +// For loadLibrary +#ifdef _MSC_VER +// Needed so that the max/min definitions in windows.h do not conflict with +// std::max/min. +#define NOMINMAX +#include +#undef NOMINMAX +#else +#include +#endif + +#include "NvInfer.h" +#include "NvInferPlugin.h" +#include "logger.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "safeCommon.h" + +using namespace nvinfer1; +using namespace plugin; + +#ifdef _MSC_VER +#define FN_NAME __FUNCTION__ +#else +#define FN_NAME __func__ +#endif + +#if defined(__aarch64__) || defined(__QNX__) +#define ENABLE_DLA_API 1 +#endif + +#define CHECK_RETURN_W_MSG(status, val, errMsg) \ + do { \ + if (!(status)) { \ + sample::gLogError << errMsg << " Error in " << __FILE__ << ", function " \ + << FN_NAME << "(), line " << __LINE__ << std::endl; \ + return val; \ + } \ + } while (0) + +#undef ASSERT +#define ASSERT(condition) \ + do { \ + if (!(condition)) { \ + sample::gLogError << "Assertion failure: " << #condition << std::endl; \ + abort(); \ + } \ + } while (0) + +#define CHECK_RETURN(status, val) CHECK_RETURN_W_MSG(status, val, "") + +#define OBJ_GUARD(A) std::unique_ptr + +template OBJ_GUARD(T) makeObjGuard(T_* t) { + CHECK(!(std::is_base_of::value || std::is_same::value)); + auto deleter = [](T* t) { t->destroy(); }; + return std::unique_ptr{static_cast(t), deleter}; +} + +constexpr long double operator"" _GiB(long double val) { + return val * (1 << 30); +} +constexpr long double operator"" _MiB(long double val) { + return val * (1 << 20); +} +constexpr long double operator"" _KiB(long double val) { + return val * (1 << 10); +} + +// These is necessary if we want to be able to write 1_GiB instead of 1.0_GiB. +// Since the return type is signed, -1_GiB will work as expected. +constexpr long long int operator"" _GiB(unsigned long long val) { + return val * (1 << 30); +} +constexpr long long int operator"" _MiB(unsigned long long val) { + return val * (1 << 20); +} +constexpr long long int operator"" _KiB(unsigned long long val) { + return val * (1 << 10); +} + +struct SimpleProfiler : public nvinfer1::IProfiler { + struct Record { + float time{0}; + int count{0}; + }; + + virtual void reportLayerTime(const char* layerName, float ms) noexcept { + mProfile[layerName].count++; + mProfile[layerName].time += ms; + if (std::find(mLayerNames.begin(), mLayerNames.end(), layerName) == + mLayerNames.end()) { + mLayerNames.push_back(layerName); + } + } + + SimpleProfiler(const char* name, + const std::vector& srcProfilers = + std::vector()) + : mName(name) { + for (const auto& srcProfiler : srcProfilers) { + for (const auto& rec : srcProfiler.mProfile) { + auto it = mProfile.find(rec.first); + if (it == mProfile.end()) { + mProfile.insert(rec); + } else { + it->second.time += rec.second.time; + it->second.count += rec.second.count; + } + } + } + } + + friend std::ostream& operator<<(std::ostream& out, + const SimpleProfiler& value) { + out << "========== " << value.mName << " profile ==========" << std::endl; + float totalTime = 0; + std::string layerNameStr = "TensorRT layer name"; + int maxLayerNameLength = + std::max(static_cast(layerNameStr.size()), 70); + for (const auto& elem : value.mProfile) { + totalTime += elem.second.time; + maxLayerNameLength = + std::max(maxLayerNameLength, static_cast(elem.first.size())); + } + + auto old_settings = out.flags(); + auto old_precision = out.precision(); + // Output header + { + out << std::setw(maxLayerNameLength) << layerNameStr << " "; + out << std::setw(12) << "Runtime, " + << "%" + << " "; + out << std::setw(12) << "Invocations" + << " "; + out << std::setw(12) << "Runtime, ms" << std::endl; + } + for (size_t i = 0; i < value.mLayerNames.size(); i++) { + const std::string layerName = value.mLayerNames[i]; + auto elem = value.mProfile.at(layerName); + out << std::setw(maxLayerNameLength) << layerName << " "; + out << std::setw(12) << std::fixed << std::setprecision(1) + << (elem.time * 100.0F / totalTime) << "%" + << " "; + out << std::setw(12) << elem.count << " "; + out << std::setw(12) << std::fixed << std::setprecision(2) << elem.time + << std::endl; + } + out.flags(old_settings); + out.precision(old_precision); + out << "========== " << value.mName << " total runtime = " << totalTime + << " ms ==========" << std::endl; + + return out; + } + + private: + std::string mName; + std::vector mLayerNames; + std::map mProfile; +}; + +//! Locate path to file, given its filename or filepath suffix and possible dirs +//! it might lie in. +//! Function will also walk back MAX_DEPTH dirs from CWD to check for such a +//! file path. +inline std::string locateFile(const std::string& filepathSuffix, + const std::vector& directories, + bool reportError = true) { + const int MAX_DEPTH{10}; + bool found{false}; + std::string filepath; + + for (auto& dir : directories) { + if (!dir.empty() && dir.back() != '/') { +#ifdef _MSC_VER + filepath = dir + "\\" + filepathSuffix; +#else + filepath = dir + "/" + filepathSuffix; +#endif + } else { + filepath = dir + filepathSuffix; + } + + for (int i = 0; i < MAX_DEPTH && !found; i++) { + const std::ifstream checkFile(filepath); + found = checkFile.is_open(); + if (found) { + break; + } + + filepath = "../" + filepath; // Try again in parent dir + } + + if (found) { + break; + } + + filepath.clear(); + } + + // Could not find the file + if (filepath.empty()) { + const std::string dirList = std::accumulate( + directories.begin() + 1, directories.end(), directories.front(), + [](const std::string& a, const std::string& b) { + return a + "\n\t" + b; + }); + std::cout << "Could not find " << filepathSuffix + << " in data directories:\n\t" << dirList << std::endl; + + if (reportError) { + std::cout << "&&&& FAILED" << std::endl; + exit(EXIT_FAILURE); + } + } + + return filepath; +} + +inline void readPGMFile(const std::string& fileName, uint8_t* buffer, int inH, + int inW) { + std::ifstream infile(fileName, std::ifstream::binary); + assert(infile.is_open() && + "Attempting to read from a file that is not open."); + std::string magic, h, w, max; + infile >> magic >> h >> w >> max; + infile.seekg(1, infile.cur); + infile.read(reinterpret_cast(buffer), inH * inW); +} + +namespace samplesCommon { + +// Swaps endianness of an integral type. +template ::value, int>::type = 0> +inline T swapEndianness(const T& value) { + uint8_t bytes[sizeof(T)]; + for (int i = 0; i < static_cast(sizeof(T)); ++i) { + bytes[sizeof(T) - 1 - i] = *(reinterpret_cast(&value) + i); + } + return *reinterpret_cast(bytes); +} + +class HostMemory { + public: + HostMemory() = delete; + virtual void* data() const noexcept { return mData; } + virtual std::size_t size() const noexcept { return mSize; } + virtual DataType type() const noexcept { return mType; } + virtual ~HostMemory() {} + + protected: + HostMemory(std::size_t size, DataType type) + : mData{nullptr}, mSize(size), mType(type) {} + void* mData; + std::size_t mSize; + DataType mType; +}; + +template +class TypedHostMemory : public HostMemory { + public: + explicit TypedHostMemory(std::size_t size) : HostMemory(size, dataType) { + mData = new ElemType[size]; + }; + ~TypedHostMemory() noexcept { delete[](ElemType*) mData; } + ElemType* raw() noexcept { return static_cast(data()); } +}; + +using FloatMemory = TypedHostMemory; +using HalfMemory = TypedHostMemory; +using ByteMemory = TypedHostMemory; + +inline void* safeCudaMalloc(size_t memSize) { + void* deviceMem; + CHECK(cudaMalloc(&deviceMem, memSize)); + if (deviceMem == nullptr) { + std::cerr << "Out of memory" << std::endl; + exit(1); + } + return deviceMem; +} + +inline bool isDebug() { return (std::getenv("TENSORRT_DEBUG") ? true : false); } + +struct InferDeleter { + template void operator()(T* obj) const { delete obj; } +}; + +template using SampleUniquePtr = std::unique_ptr; + +static auto StreamDeleter = [](cudaStream_t* pStream) { + if (pStream) { + cudaStreamDestroy(*pStream); + delete pStream; + } +}; + +inline std::unique_ptr makeCudaStream() { + std::unique_ptr pStream( + new cudaStream_t, StreamDeleter); + if (cudaStreamCreateWithFlags(pStream.get(), cudaStreamNonBlocking) != + cudaSuccess) { + pStream.reset(nullptr); + } + + return pStream; +} + +//! Return vector of indices that puts magnitudes of sequence in descending +//! order. +template +std::vector argMagnitudeSort(Iter begin, Iter end) { + std::vector indices(end - begin); + std::iota(indices.begin(), indices.end(), 0); + std::sort(indices.begin(), indices.end(), [&begin](size_t i, size_t j) { + return std::abs(begin[j]) < std::abs(begin[i]); + }); + return indices; +} + +inline bool readReferenceFile(const std::string& fileName, + std::vector& refVector) { + std::ifstream infile(fileName); + if (!infile.is_open()) { + std::cout << "ERROR: readReferenceFile: Attempting to read from a file " + "that is not open." + << std::endl; + return false; + } + std::string line; + while (std::getline(infile, line)) { + if (line.empty()) + continue; + refVector.push_back(line); + } + infile.close(); + return true; +} + +template +std::vector classify(const std::vector& refVector, + const std::vector& output, + const size_t topK) { + const auto inds = + samplesCommon::argMagnitudeSort(output.cbegin(), output.cend()); + std::vector result; + result.reserve(topK); + for (size_t k = 0; k < topK; ++k) { + result.push_back(refVector[inds[k]]); + } + return result; +} + +// Returns indices of highest K magnitudes in v. +template +std::vector topKMagnitudes(const std::vector& v, const size_t k) { + std::vector indices = + samplesCommon::argMagnitudeSort(v.cbegin(), v.cend()); + indices.resize(k); + return indices; +} + +template +bool readASCIIFile(const std::string& fileName, const size_t size, + std::vector& out) { + std::ifstream infile(fileName); + if (!infile.is_open()) { + std::cout << "ERROR readASCIIFile: Attempting to read from a file that is " + "not open." + << std::endl; + return false; + } + out.clear(); + out.reserve(size); + out.assign(std::istream_iterator(infile), std::istream_iterator()); + infile.close(); + return true; +} + +template +bool writeASCIIFile(const std::string& fileName, const std::vector& in) { + std::ofstream outfile(fileName); + if (!outfile.is_open()) { + std::cout << "ERROR: writeASCIIFile: Attempting to write to a file that is " + "not open." + << std::endl; + return false; + } + for (auto fn : in) { + outfile << fn << "\n"; + } + outfile.close(); + return true; +} + +inline void print_version() { + std::cout << " TensorRT version: " << NV_TENSORRT_MAJOR << "." + << NV_TENSORRT_MINOR << "." << NV_TENSORRT_PATCH << "." + << NV_TENSORRT_BUILD << std::endl; +} + +inline std::string getFileType(const std::string& filepath) { + return filepath.substr(filepath.find_last_of(".") + 1); +} + +inline std::string toLower(const std::string& inp) { + std::string out = inp; + std::transform(out.begin(), out.end(), out.begin(), ::tolower); + return out; +} + +inline float getMaxValue(const float* buffer, int64_t size) { + assert(buffer != nullptr); + assert(size > 0); + return *std::max_element(buffer, buffer + size); +} + +// Ensures that every tensor used by a network has a dynamic range set. +// +// All tensors in a network must have a dynamic range specified if a calibrator +// is not used. +// This function is just a utility to globally fill in missing scales and +// zero-points for the entire network. +// +// If a tensor does not have a dyanamic range set, it is assigned inRange or +// outRange as follows: +// +// * If the tensor is the input to a layer or output of a pooling node, its +// dynamic range is derived from inRange. +// * Otherwise its dynamic range is derived from outRange. +// +// The default parameter values are intended to demonstrate, for final layers in +// the network, +// cases where dynamic ranges are asymmetric. +// +// The default parameter values choosen arbitrarily. Range values should be +// choosen such that +// we avoid underflow or overflow. Also range value should be non zero to avoid +// uniform zero scale tensor. +inline void setAllDynamicRanges(INetworkDefinition* network, + float inRange = 2.0f, float outRange = 4.0f) { + // Ensure that all layer inputs have a scale. + for (int i = 0; i < network->getNbLayers(); i++) { + auto layer = network->getLayer(i); + for (int j = 0; j < layer->getNbInputs(); j++) { + ITensor* input{layer->getInput(j)}; + // Optional inputs are nullptr here and are from RNN layers. + if (input != nullptr && !input->dynamicRangeIsSet()) { + ASSERT(input->setDynamicRange(-inRange, inRange)); + } + } + } + + // Ensure that all layer outputs have a scale. + // Tensors that are also inputs to layers are ingored here + // since the previous loop nest assigned scales to them. + for (int i = 0; i < network->getNbLayers(); i++) { + auto layer = network->getLayer(i); + for (int j = 0; j < layer->getNbOutputs(); j++) { + ITensor* output{layer->getOutput(j)}; + // Optional outputs are nullptr here and are from RNN layers. + if (output != nullptr && !output->dynamicRangeIsSet()) { + // Pooling must have the same input and output scales. + if (layer->getType() == LayerType::kPOOLING) { + ASSERT(output->setDynamicRange(-inRange, inRange)); + } else { + ASSERT(output->setDynamicRange(-outRange, outRange)); + } + } + } + } +} + +inline void setDummyInt8DynamicRanges(const IBuilderConfig* c, + INetworkDefinition* n) { + // Set dummy per-tensor dynamic range if Int8 mode is requested. + if (c->getFlag(BuilderFlag::kINT8)) { + sample::gLogWarning << "Int8 calibrator not provided. Generating dummy " + "per-tensor dynamic range. Int8 accuracy is not " + "guaranteed." + << std::endl; + setAllDynamicRanges(n); + } +} + +inline void enableDLA(IBuilder* builder, IBuilderConfig* config, int useDLACore, + bool allowGPUFallback = true) { + if (useDLACore >= 0) { + if (builder->getNbDLACores() == 0) { + std::cerr << "Trying to use DLA core " << useDLACore + << " on a platform that doesn't have any DLA cores" + << std::endl; + assert( + "Error: use DLA core on a platfrom that doesn't have any DLA cores" && + false); + } + if (allowGPUFallback) { + config->setFlag(BuilderFlag::kGPU_FALLBACK); + } + if (!config->getFlag(BuilderFlag::kINT8)) { + // User has not requested INT8 Mode. + // By default run in FP16 mode. FP32 mode is not permitted. + config->setFlag(BuilderFlag::kFP16); + } + config->setDefaultDeviceType(DeviceType::kDLA); + config->setDLACore(useDLACore); + } +} + +inline int32_t parseDLA(int32_t argc, char** argv) { + for (int32_t i = 1; i < argc; i++) { + if (strncmp(argv[i], "--useDLACore=", 13) == 0) { + return std::stoi(argv[i] + 13); + } + } + return -1; +} + +inline uint32_t getElementSize(nvinfer1::DataType t) noexcept { + switch (t) { + case nvinfer1::DataType::kINT32: + return 4; + case nvinfer1::DataType::kFLOAT: + return 4; + case nvinfer1::DataType::kHALF: + return 2; + case nvinfer1::DataType::kBOOL: + case nvinfer1::DataType::kINT8: + return 1; + } + return 0; +} + +inline int64_t volume(const nvinfer1::Dims& d) { + return std::accumulate(d.d, d.d + d.nbDims, 1, std::multiplies()); +} + +template struct PPM { + std::string magic, fileName; + int h, w, max; + uint8_t buffer[C * H * W]; +}; + +// New vPPM(variable sized PPM) class with variable dimensions. +struct vPPM { + std::string magic, fileName; + int h, w, max; + std::vector buffer; +}; + +struct BBox { + float x1, y1, x2, y2; +}; + +template +void readPPMFile(const std::string& filename, + samplesCommon::PPM& ppm) { + ppm.fileName = filename; + std::ifstream infile(filename, std::ifstream::binary); + assert(infile.is_open() && + "Attempting to read from a file that is not open."); + infile >> ppm.magic >> ppm.w >> ppm.h >> ppm.max; + infile.seekg(1, infile.cur); + infile.read(reinterpret_cast(ppm.buffer), ppm.w * ppm.h * 3); +} + +inline void readPPMFile(const std::string& filename, vPPM& ppm, + std::vector& input_dir) { + ppm.fileName = filename; + std::ifstream infile(locateFile(filename, input_dir), std::ifstream::binary); + infile >> ppm.magic >> ppm.w >> ppm.h >> ppm.max; + infile.seekg(1, infile.cur); + + for (int i = 0; i < ppm.w * ppm.h * 3; ++i) { + ppm.buffer.push_back(0); + } + + infile.read(reinterpret_cast(&ppm.buffer[0]), ppm.w * ppm.h * 3); +} + +template +void writePPMFileWithBBox(const std::string& filename, PPM& ppm, + const BBox& bbox) { + std::ofstream outfile("./" + filename, std::ofstream::binary); + assert(!outfile.fail()); + outfile << "P6" + << "\n" + << ppm.w << " " << ppm.h << "\n" + << ppm.max << "\n"; + + auto round = [](float x) -> int { return int(std::floor(x + 0.5f)); }; + const int x1 = std::min(std::max(0, round(int(bbox.x1))), W - 1); + const int x2 = std::min(std::max(0, round(int(bbox.x2))), W - 1); + const int y1 = std::min(std::max(0, round(int(bbox.y1))), H - 1); + const int y2 = std::min(std::max(0, round(int(bbox.y2))), H - 1); + + for (int x = x1; x <= x2; ++x) { + // bbox top border + ppm.buffer[(y1 * ppm.w + x) * 3] = 255; + ppm.buffer[(y1 * ppm.w + x) * 3 + 1] = 0; + ppm.buffer[(y1 * ppm.w + x) * 3 + 2] = 0; + // bbox bottom border + ppm.buffer[(y2 * ppm.w + x) * 3] = 255; + ppm.buffer[(y2 * ppm.w + x) * 3 + 1] = 0; + ppm.buffer[(y2 * ppm.w + x) * 3 + 2] = 0; + } + + for (int y = y1; y <= y2; ++y) { + // bbox left border + ppm.buffer[(y * ppm.w + x1) * 3] = 255; + ppm.buffer[(y * ppm.w + x1) * 3 + 1] = 0; + ppm.buffer[(y * ppm.w + x1) * 3 + 2] = 0; + // bbox right border + ppm.buffer[(y * ppm.w + x2) * 3] = 255; + ppm.buffer[(y * ppm.w + x2) * 3 + 1] = 0; + ppm.buffer[(y * ppm.w + x2) * 3 + 2] = 0; + } + + outfile.write(reinterpret_cast(ppm.buffer), ppm.w * ppm.h * 3); +} + +inline void writePPMFileWithBBox(const std::string& filename, vPPM ppm, + std::vector& dets) { + std::ofstream outfile("./" + filename, std::ofstream::binary); + assert(!outfile.fail()); + outfile << "P6" + << "\n" + << ppm.w << " " << ppm.h << "\n" + << ppm.max << "\n"; + auto round = [](float x) -> int { return int(std::floor(x + 0.5f)); }; + + for (auto bbox : dets) { + for (int x = int(bbox.x1); x < int(bbox.x2); ++x) { + // bbox top border + ppm.buffer[(round(bbox.y1) * ppm.w + x) * 3] = 255; + ppm.buffer[(round(bbox.y1) * ppm.w + x) * 3 + 1] = 0; + ppm.buffer[(round(bbox.y1) * ppm.w + x) * 3 + 2] = 0; + // bbox bottom border + ppm.buffer[(round(bbox.y2) * ppm.w + x) * 3] = 255; + ppm.buffer[(round(bbox.y2) * ppm.w + x) * 3 + 1] = 0; + ppm.buffer[(round(bbox.y2) * ppm.w + x) * 3 + 2] = 0; + } + + for (int y = int(bbox.y1); y < int(bbox.y2); ++y) { + // bbox left border + ppm.buffer[(y * ppm.w + round(bbox.x1)) * 3] = 255; + ppm.buffer[(y * ppm.w + round(bbox.x1)) * 3 + 1] = 0; + ppm.buffer[(y * ppm.w + round(bbox.x1)) * 3 + 2] = 0; + // bbox right border + ppm.buffer[(y * ppm.w + round(bbox.x2)) * 3] = 255; + ppm.buffer[(y * ppm.w + round(bbox.x2)) * 3 + 1] = 0; + ppm.buffer[(y * ppm.w + round(bbox.x2)) * 3 + 2] = 0; + } + } + + outfile.write(reinterpret_cast(&ppm.buffer[0]), ppm.w * ppm.h * 3); +} + +class TimerBase { + public: + virtual void start() {} + virtual void stop() {} + float microseconds() const noexcept { return mMs * 1000.f; } + float milliseconds() const noexcept { return mMs; } + float seconds() const noexcept { return mMs / 1000.f; } + void reset() noexcept { mMs = 0.f; } + + protected: + float mMs{0.0f}; +}; + +class GpuTimer : public TimerBase { + public: + explicit GpuTimer(cudaStream_t stream) : mStream(stream) { + CHECK(cudaEventCreate(&mStart)); + CHECK(cudaEventCreate(&mStop)); + } + ~GpuTimer() { + CHECK(cudaEventDestroy(mStart)); + CHECK(cudaEventDestroy(mStop)); + } + void start() { CHECK(cudaEventRecord(mStart, mStream)); } + void stop() { + CHECK(cudaEventRecord(mStop, mStream)); + float ms{0.0f}; + CHECK(cudaEventSynchronize(mStop)); + CHECK(cudaEventElapsedTime(&ms, mStart, mStop)); + mMs += ms; + } + + private: + cudaEvent_t mStart, mStop; + cudaStream_t mStream; +}; // class GpuTimer + +template class CpuTimer : public TimerBase { + public: + using clock_type = Clock; + + void start() { mStart = Clock::now(); } + void stop() { + mStop = Clock::now(); + mMs += std::chrono::duration{mStop - mStart}.count(); + } + + private: + std::chrono::time_point mStart, mStop; +}; // class CpuTimer + +using PreciseCpuTimer = CpuTimer; + +inline std::vector splitString(std::string str, + char delimiter = ',') { + std::vector splitVect; + std::stringstream ss(str); + std::string substr; + + while (ss.good()) { + getline(ss, substr, delimiter); + splitVect.emplace_back(std::move(substr)); + } + return splitVect; +} + +// Return m rounded up to nearest multiple of n +inline int roundUp(int m, int n) { return ((m + n - 1) / n) * n; } + +inline int getC(const Dims& d) { return d.nbDims >= 3 ? d.d[d.nbDims - 3] : 1; } + +inline int getH(const Dims& d) { return d.nbDims >= 2 ? d.d[d.nbDims - 2] : 1; } + +inline int getW(const Dims& d) { return d.nbDims >= 1 ? d.d[d.nbDims - 1] : 1; } + +inline void loadLibrary(const std::string& path) { +#ifdef _MSC_VER + void* handle = LoadLibrary(path.c_str()); +#else + int32_t flags{RTLD_LAZY}; +#if ENABLE_ASAN + // https://github.com/google/sanitizers/issues/89 + // asan doesn't handle module unloading correctly and there are no plans on + // doing + // so. In order to get proper stack traces, don't delete the shared library on + // close so that asan can resolve the symbols correctly. + flags |= RTLD_NODELETE; +#endif // ENABLE_ASAN + + void* handle = dlopen(path.c_str(), flags); +#endif + if (handle == nullptr) { +#ifdef _MSC_VER + sample::gLogError << "Could not load plugin library: " << path << std::endl; +#else + sample::gLogError << "Could not load plugin library: " << path + << ", due to: " << dlerror() << std::endl; +#endif + } +} + +inline int32_t getSMVersion() { + int32_t deviceIndex = 0; + CHECK(cudaGetDevice(&deviceIndex)); + + int32_t major, minor; + CHECK(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, + deviceIndex)); + CHECK(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, + deviceIndex)); + + return ((major << 8) | minor); +} + +inline bool isSMSafe() { + const int32_t smVersion = getSMVersion(); + return smVersion == 0x0700 || smVersion == 0x0702 || smVersion == 0x0705 || + smVersion == 0x0800 || smVersion == 0x0806 || smVersion == 0x0807; +} + +inline bool isDataTypeSupported(DataType dataType) { + auto builder = SampleUniquePtr( + nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger())); + if (!builder) { + return false; + } + + if ((dataType == DataType::kINT8 && !builder->platformHasFastInt8()) || + (dataType == DataType::kHALF && !builder->platformHasFastFp16())) { + return false; + } + + return true; +} + +} // namespace samplesCommon + +inline std::ostream& operator<<(std::ostream& os, const nvinfer1::Dims& dims) { + os << "("; + for (int i = 0; i < dims.nbDims; ++i) { + os << (i ? ", " : "") << dims.d[i]; + } + return os << ")"; +} + +#endif // TENSORRT_COMMON_H diff --git a/fastdeploy/backends/tensorrt/common/getOptions.cpp b/fastdeploy/backends/tensorrt/common/getOptions.cpp new file mode 100644 index 0000000000..84b06581a6 --- /dev/null +++ b/fastdeploy/backends/tensorrt/common/getOptions.cpp @@ -0,0 +1,223 @@ +/* + * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "getOptions.h" +#include "logger.h" + +#include +#include +#include +#include +#include + +namespace nvinfer1 { +namespace utility { + +//! Matching for TRTOptions is defined as follows: +//! +//! If A and B both have longName set, A matches B if and only if A.longName == +//! B.longName and (A.shortName == B.shortName if both have short name set). +//! +//! If A only has shortName set and B only has longName set, then A does not +//! match B. It is assumed that when 2 TRTOptions are compared, one of them is +//! the definition of a TRTOption in the input to getOptions. As such, if the +//! definition only has shortName set, it will never be equal to a TRTOption +//! that does not have shortName set (and same for longName). +//! +//! If A and B both have shortName set but B does not have longName set, A +//! matches B if and only if A.shortName == B.shortName. +//! +//! If A has neither long or short name set, A matches B if and only if B has +//! neither long or short name set. +bool matches(const TRTOption& a, const TRTOption& b) { + if (!a.longName.empty() && !b.longName.empty()) { + if (a.shortName && b.shortName) { + return (a.longName == b.longName) && (a.shortName == b.shortName); + } + return a.longName == b.longName; + } + + // If only one of them is not set, this will return false anyway. + return a.shortName == b.shortName; +} + +//! getTRTOptionIndex returns the index of a TRTOption in a vector of +//! TRTOptions, -1 if not found. +int getTRTOptionIndex(const std::vector& options, + const TRTOption& opt) { + for (size_t i = 0; i < options.size(); ++i) { + if (matches(opt, options[i])) { + return i; + } + } + return -1; +} + +//! validateTRTOption will return a string containing an error message if +//! options +//! contain non-numeric characters, or if there are duplicate option names +//! found. +//! Otherwise, returns the empty string. +std::string validateTRTOption(const std::set& seenShortNames, + const std::set& seenLongNames, + const TRTOption& opt) { + if (opt.shortName != 0) { + if (!std::isalnum(opt.shortName)) { + return "Short name '" + std::to_string(opt.shortName) + + "' is non-alphanumeric"; + } + + if (seenShortNames.find(opt.shortName) != seenShortNames.end()) { + return "Short name '" + std::to_string(opt.shortName) + + "' is a duplicate"; + } + } + + if (!opt.longName.empty()) { + for (const char& c : opt.longName) { + if (!std::isalnum(c) && c != '-' && c != '_') { + return "Long name '" + opt.longName + + "' contains characters that are not '-', '_', or alphanumeric"; + } + } + + if (seenLongNames.find(opt.longName) != seenLongNames.end()) { + return "Long name '" + opt.longName + "' is a duplicate"; + } + } + return ""; +} + +//! validateTRTOptions will return a string containing an error message if any +//! options contain non-numeric characters, or if there are duplicate option +//! names found. Otherwise, returns the empty string. +std::string validateTRTOptions(const std::vector& options) { + std::set seenShortNames; + std::set seenLongNames; + for (size_t i = 0; i < options.size(); ++i) { + const std::string errMsg = + validateTRTOption(seenShortNames, seenLongNames, options[i]); + if (!errMsg.empty()) { + return "Error '" + errMsg + "' at TRTOption " + std::to_string(i); + } + + seenShortNames.insert(options[i].shortName); + seenLongNames.insert(options[i].longName); + } + return ""; +} + +//! parseArgs parses an argument list and returns a TRTParsedArgs with the +//! fields set accordingly. Assumes that options is validated. +//! ErrMsg will be set if: +//! - an argument is null +//! - an argument is empty +//! - an argument does not have option (i.e. "-" and "--") +//! - a short argument has more than 1 character +//! - the last argument in the list requires a value +TRTParsedArgs parseArgs(int argc, const char* const* argv, + const std::vector& options) { + TRTParsedArgs parsedArgs; + parsedArgs.values.resize(options.size()); + + for (int i = 1; i < argc; ++i) // index of current command-line argument + { + if (argv[i] == nullptr) { + return TRTParsedArgs{"Null argument at index " + std::to_string(i)}; + } + + const std::string argStr(argv[i]); + if (argStr.empty()) { + return TRTParsedArgs{"Empty argument at index " + std::to_string(i)}; + } + + // No starting hyphen means it is a positional argument + if (argStr[0] != '-') { + parsedArgs.positionalArgs.push_back(argStr); + continue; + } + + if (argStr == "-" || argStr == "--") { + return TRTParsedArgs{"Argument does not specify an option at index " + + std::to_string(i)}; + } + + // If only 1 hyphen, char after is the flag. + TRTOption opt{' ', "", false, ""}; + std::string value; + if (argStr[1] != '-') { + // Must only have 1 char after the hyphen + if (argStr.size() > 2) { + return TRTParsedArgs{ + "Short arg contains more than 1 character at index " + + std::to_string(i)}; + } + opt.shortName = argStr[1]; + } else { + opt.longName = argStr.substr(2); + + // We need to support --foo=bar syntax, so look for '=' + const size_t eqIndex = opt.longName.find('='); + if (eqIndex < opt.longName.size()) { + value = opt.longName.substr(eqIndex + 1); + opt.longName = opt.longName.substr(0, eqIndex); + } + } + + const int idx = getTRTOptionIndex(options, opt); + if (idx < 0) { + continue; + } + + if (options[idx].valueRequired) { + if (!value.empty()) { + parsedArgs.values[idx].second.push_back(value); + parsedArgs.values[idx].first = parsedArgs.values[idx].second.size(); + continue; + } + + if (i + 1 >= argc) { + return TRTParsedArgs{"Last argument requires value, but none given"}; + } + + const std::string nextArg(argv[i + 1]); + if (nextArg.size() >= 1 && nextArg[0] == '-') { + sample::gLogWarning << "Warning: Using '" << nextArg + << "' as a value for '" << argStr + << "', Should this be its own flag?" << std::endl; + } + + parsedArgs.values[idx].second.push_back(nextArg); + i += 1; // Next argument already consumed + + parsedArgs.values[idx].first = parsedArgs.values[idx].second.size(); + } else { + parsedArgs.values[idx].first += 1; + } + } + return parsedArgs; +} + +TRTParsedArgs getOptions(int argc, const char* const* argv, + const std::vector& options) { + const std::string errMsg = validateTRTOptions(options); + if (!errMsg.empty()) { + return TRTParsedArgs{errMsg}; + } + return parseArgs(argc, argv, options); +} +} // namespace utility +} // namespace nvinfer1 diff --git a/fastdeploy/backends/tensorrt/common/getOptions.h b/fastdeploy/backends/tensorrt/common/getOptions.h new file mode 100644 index 0000000000..efe466632f --- /dev/null +++ b/fastdeploy/backends/tensorrt/common/getOptions.h @@ -0,0 +1,128 @@ +/* + * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef TRT_GET_OPTIONS_H +#define TRT_GET_OPTIONS_H + +#include +#include +#include + +namespace nvinfer1 { +namespace utility { + +//! TRTOption defines a command line option. At least 1 of shortName and +//! longName +//! must be defined. +//! If bool initialization is undefined behavior on your system, valueRequired +//! must also be explicitly defined. +//! helpText is optional. +struct TRTOption { + char shortName; //!< Option name in short (single hyphen) form (i.e. -a, -b) + std::string longName; //!< Option name in long (double hyphen) form (i.e. + //!--foo, --bar) + bool valueRequired; //!< True if a value is needed for an option (i.e. -N 4, + //!--foo bar) + std::string helpText; //!< Text to show when printing out the command usage +}; + +//! TRTParsedArgs is returned by getOptions after it has parsed a command line +//! argument list (argv). +//! +//! errMsg is a string containing an error message if any errors occurred. If it +//! is empty, no errors occurred. +//! +//! values stores a vector of pairs for each option (ordered by order in the +//! input). Each pair contains an int (the number of occurrences) and a vector +//! of strings (a list of values). The user should know which of these to use, +//! and which options required values. For non-value options, only occurrences +//! is +//! populated. For value-required options, occurrences == # of values. Values do +//! not need to be unique. +//! +//! positionalArgs stores additional arguments that are passed in without an +//! option (these must not start with a hyphen). +struct TRTParsedArgs { + std::string errMsg; + std::vector>> values; + std::vector positionalArgs; +}; + +//! Parse the input arguments passed to main() and extract options as well as +//! positional arguments. +//! +//! Options are supposed to be passed to main() with a preceding hyphen '-'. +//! +//! If there is a single preceding hyphen, there should be exactly 1 character +//! after the hyphen, which is interpreted as the option. +//! +//! If there are 2 preceding hyphens, the entire argument (without the hyphens) +//! is interpreted as the option. +//! +//! If the option requires a value, the next argument is used as the value. +//! +//! Positional arguments must not start with a hyphen. +//! +//! If an argument requires a value, the next argument is interpreted as the +//! value, even if it is the form of a valid option (i.e. --foo --bar will store +//! "--bar" as a value for option "foo" if "foo" requires a value). +//! We also support --name=value syntax. In this case, 'value' would be used as +//! the value, NOT the next argument. +//! +//! For options: +//! { { 'a', "", false }, +//! { 'b', "", false }, +//! { 0, "cee", false }, +//! { 'd', "", true }, +//! { 'e', "", true }, +//! { 'f', "foo", true } } +//! +//! ./main hello world -a -a --cee -d 12 -f 34 +//! and +//! ./main hello world -a -a --cee -d 12 --foo 34 +//! +//! will result in: +//! +//! TRTParsedArgs { +//! errMsg: "", +//! values: { { 2, {} }, +//! { 0, {} }, +//! { 1, {} }, +//! { 1, {"12"} }, +//! { 0, {} }, +//! { 1, {"34"} } } +//! positionalArgs: {"hello", "world"}, +//! } +//! +//! Non-POSIX behavior: +//! - Does not support "-abcde" as a shorthand for "-a -b -c -d -e". Each +//! option must have its own hyphen prefix. +//! - Does not support -e12 as a shorthand for "-e 12". Values MUST be +//! whitespace-separated from the option it is for. +//! +//! @param[in] argc The number of arguments passed to main (including the +//! file name, which is disregarded) +//! @param[in] argv The arguments passed to main (including the file name, +//! which is disregarded) +//! @param[in] options List of TRTOptions to parse +//! @return TRTParsedArgs. See TRTParsedArgs documentation for descriptions of +//! the fields. +TRTParsedArgs getOptions(int argc, const char* const* argv, + const std::vector& options); +} // namespace utility +} // namespace nvinfer1 + +#endif // TRT_GET_OPTIONS_H diff --git a/fastdeploy/backends/tensorrt/common/half.h b/fastdeploy/backends/tensorrt/common/half.h new file mode 100644 index 0000000000..5ca7970005 --- /dev/null +++ b/fastdeploy/backends/tensorrt/common/half.h @@ -0,0 +1,3787 @@ +// half - IEEE 754-based half-precision floating point library. +// +// Copyright (c) 2012-2017 Christian Rau +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated +// documentation files (the "Software"), to deal in the Software without +// restriction, including without limitation the +// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +// sell copies of the Software, and to +// permit persons to whom the Software is furnished to do so, subject to the +// following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the +// Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE +// WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR +// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. + +/* + * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Version 1.12.0 + +/// \file +/// Main header file for half precision functionality. + +#ifndef HALF_HALF_HPP +#define HALF_HALF_HPP + +/// Combined gcc version number. +#define HALF_GNUC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) + +// check C++11 language features +#if defined(__clang__) // clang +#if __has_feature(cxx_static_assert) && \ + !defined(HALF_ENABLE_CPP11_STATIC_ASSERT) +#define HALF_ENABLE_CPP11_STATIC_ASSERT 1 +#endif +#if __has_feature(cxx_constexpr) && !defined(HALF_ENABLE_CPP11_CONSTEXPR) +#define HALF_ENABLE_CPP11_CONSTEXPR 1 +#endif +#if __has_feature(cxx_noexcept) && !defined(HALF_ENABLE_CPP11_NOEXCEPT) +#define HALF_ENABLE_CPP11_NOEXCEPT 1 +#endif +#if __has_feature(cxx_user_literals) && \ + !defined(HALF_ENABLE_CPP11_USER_LITERALS) +#define HALF_ENABLE_CPP11_USER_LITERALS 1 +#endif +#if (defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L) && \ + !defined(HALF_ENABLE_CPP11_LONG_LONG) +#define HALF_ENABLE_CPP11_LONG_LONG 1 +#endif +/*#elif defined(__INTEL_COMPILER) + //Intel C++ + #if __INTEL_COMPILER >= 1100 && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT) + ???????? + #define HALF_ENABLE_CPP11_STATIC_ASSERT 1 + #endif + #if __INTEL_COMPILER >= 1300 && !defined(HALF_ENABLE_CPP11_CONSTEXPR) + ???????? + #define HALF_ENABLE_CPP11_CONSTEXPR 1 + #endif + #if __INTEL_COMPILER >= 1300 && !defined(HALF_ENABLE_CPP11_NOEXCEPT) + ???????? + #define HALF_ENABLE_CPP11_NOEXCEPT 1 + #endif + #if __INTEL_COMPILER >= 1100 && !defined(HALF_ENABLE_CPP11_LONG_LONG) + ???????? + #define HALF_ENABLE_CPP11_LONG_LONG 1 + #endif*/ +#elif defined(__GNUC__) // gcc +#if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L +#if HALF_GNUC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT) +#define HALF_ENABLE_CPP11_STATIC_ASSERT 1 +#endif +#if HALF_GNUC_VERSION >= 406 && !defined(HALF_ENABLE_CPP11_CONSTEXPR) +#define HALF_ENABLE_CPP11_CONSTEXPR 1 +#endif +#if HALF_GNUC_VERSION >= 406 && !defined(HALF_ENABLE_CPP11_NOEXCEPT) +#define HALF_ENABLE_CPP11_NOEXCEPT 1 +#endif +#if HALF_GNUC_VERSION >= 407 && !defined(HALF_ENABLE_CPP11_USER_LITERALS) +#define HALF_ENABLE_CPP11_USER_LITERALS 1 +#endif +#if !defined(HALF_ENABLE_CPP11_LONG_LONG) +#define HALF_ENABLE_CPP11_LONG_LONG 1 +#endif +#endif +#elif defined(_MSC_VER) // Visual C++ +#if _MSC_VER >= 1900 && !defined(HALF_ENABLE_CPP11_CONSTEXPR) +#define HALF_ENABLE_CPP11_CONSTEXPR 1 +#endif +#if _MSC_VER >= 1900 && !defined(HALF_ENABLE_CPP11_NOEXCEPT) +#define HALF_ENABLE_CPP11_NOEXCEPT 1 +#endif +#if _MSC_VER >= 1900 && !defined(HALF_ENABLE_CPP11_USER_LITERALS) +#define HALF_ENABLE_CPP11_USER_LITERALS 1 +#endif +#if _MSC_VER >= 1600 && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT) +#define HALF_ENABLE_CPP11_STATIC_ASSERT 1 +#endif +#if _MSC_VER >= 1310 && !defined(HALF_ENABLE_CPP11_LONG_LONG) +#define HALF_ENABLE_CPP11_LONG_LONG 1 +#endif +#define HALF_POP_WARNINGS 1 +#pragma warning(push) +#pragma warning(disable : 4099 4127 4146) // struct vs class, constant in if, +// negative unsigned +#endif + +// check C++11 library features +#include +#if defined(_LIBCPP_VERSION) // libc++ +#if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103 +#ifndef HALF_ENABLE_CPP11_TYPE_TRAITS +#define HALF_ENABLE_CPP11_TYPE_TRAITS 1 +#endif +#ifndef HALF_ENABLE_CPP11_CSTDINT +#define HALF_ENABLE_CPP11_CSTDINT 1 +#endif +#ifndef HALF_ENABLE_CPP11_CMATH +#define HALF_ENABLE_CPP11_CMATH 1 +#endif +#ifndef HALF_ENABLE_CPP11_HASH +#define HALF_ENABLE_CPP11_HASH 1 +#endif +#endif +#elif defined(__GLIBCXX__) // libstdc++ +#if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103 +#ifdef __clang__ +#if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_TYPE_TRAITS) +#define HALF_ENABLE_CPP11_TYPE_TRAITS 1 +#endif +#if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_CSTDINT) +#define HALF_ENABLE_CPP11_CSTDINT 1 +#endif +#if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_CMATH) +#define HALF_ENABLE_CPP11_CMATH 1 +#endif +#if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_HASH) +#define HALF_ENABLE_CPP11_HASH 1 +#endif +#else +#if HALF_GNUC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_CSTDINT) +#define HALF_ENABLE_CPP11_CSTDINT 1 +#endif +#if HALF_GNUC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_CMATH) +#define HALF_ENABLE_CPP11_CMATH 1 +#endif +#if HALF_GNUC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_HASH) +#define HALF_ENABLE_CPP11_HASH 1 +#endif +#endif +#endif +#elif defined(_CPPLIB_VER) // Dinkumware/Visual C++ +#if _CPPLIB_VER >= 520 +#ifndef HALF_ENABLE_CPP11_TYPE_TRAITS +#define HALF_ENABLE_CPP11_TYPE_TRAITS 1 +#endif +#ifndef HALF_ENABLE_CPP11_CSTDINT +#define HALF_ENABLE_CPP11_CSTDINT 1 +#endif +#ifndef HALF_ENABLE_CPP11_HASH +#define HALF_ENABLE_CPP11_HASH 1 +#endif +#endif +#if _CPPLIB_VER >= 610 +#ifndef HALF_ENABLE_CPP11_CMATH +#define HALF_ENABLE_CPP11_CMATH 1 +#endif +#endif +#endif +#undef HALF_GNUC_VERSION + +// support constexpr +#if HALF_ENABLE_CPP11_CONSTEXPR +#define HALF_CONSTEXPR constexpr +#define HALF_CONSTEXPR_CONST constexpr +#else +#define HALF_CONSTEXPR +#define HALF_CONSTEXPR_CONST const +#endif + +// support noexcept +#if HALF_ENABLE_CPP11_NOEXCEPT +#define HALF_NOEXCEPT noexcept +#define HALF_NOTHROW noexcept +#else +#define HALF_NOEXCEPT +#define HALF_NOTHROW throw() +#endif + +#include +#include +#include +#include +#include +#include +#if HALF_ENABLE_CPP11_TYPE_TRAITS +#include +#endif +#if HALF_ENABLE_CPP11_CSTDINT +#include +#endif +#if HALF_ENABLE_CPP11_HASH +#include +#endif + +/// Default rounding mode. +/// This specifies the rounding mode used for all conversions between +/// [half](\ref half_float::half)s and `float`s as +/// well as for the half_cast() if not specifying a rounding mode explicitly. It +/// can be redefined (before including +/// half.hpp) to one of the standard rounding modes using their respective +/// constants or the equivalent values of +/// `std::float_round_style`: +/// +/// `std::float_round_style` | value | rounding +/// ---------------------------------|-------|------------------------- +/// `std::round_indeterminate` | -1 | fastest (default) +/// `std::round_toward_zero` | 0 | toward zero +/// `std::round_to_nearest` | 1 | to nearest +/// `std::round_toward_infinity` | 2 | toward positive infinity +/// `std::round_toward_neg_infinity` | 3 | toward negative infinity +/// +/// By default this is set to `-1` (`std::round_indeterminate`), which uses +/// truncation (round toward zero, but with +/// overflows set to infinity) and is the fastest rounding mode possible. It can +/// even be set to +/// `std::numeric_limits::round_style` to synchronize the rounding mode +/// with that of the underlying +/// single-precision implementation. +#ifndef HALF_ROUND_STYLE +#define HALF_ROUND_STYLE 1 // = std::round_to_nearest +#endif + +/// Tie-breaking behaviour for round to nearest. +/// This specifies if ties in round to nearest should be resolved by rounding to +/// the nearest even value. By default this +/// is defined to `0` resulting in the faster but slightly more biased behaviour +/// of rounding away from zero in half-way +/// cases (and thus equal to the round() function), but can be redefined to `1` +/// (before including half.hpp) if more +/// IEEE-conformant behaviour is needed. +#ifndef HALF_ROUND_TIES_TO_EVEN +#define HALF_ROUND_TIES_TO_EVEN 0 // ties away from zero +#endif + +/// Value signaling overflow. +/// In correspondence with `HUGE_VAL[F|L]` from `` this symbol expands to +/// a positive value signaling the overflow +/// of an operation, in particular it just evaluates to positive infinity. +#define HUGE_VALH std::numeric_limits::infinity() + +/// Fast half-precision fma function. +/// This symbol is only defined if the fma() function generally executes as fast +/// as, or faster than, a separate +/// half-precision multiplication followed by an addition. Due to the internal +/// single-precision implementation of all +/// arithmetic operations, this is in fact always the case. +#define FP_FAST_FMAH 1 + +#ifndef FP_ILOGB0 +#define FP_ILOGB0 INT_MIN +#endif +#ifndef FP_ILOGBNAN +#define FP_ILOGBNAN INT_MAX +#endif +#ifndef FP_SUBNORMAL +#define FP_SUBNORMAL 0 +#endif +#ifndef FP_ZERO +#define FP_ZERO 1 +#endif +#ifndef FP_NAN +#define FP_NAN 2 +#endif +#ifndef FP_INFINITE +#define FP_INFINITE 3 +#endif +#ifndef FP_NORMAL +#define FP_NORMAL 4 +#endif + +/// Main namespace for half precision functionality. +/// This namespace contains all the functionality provided by the library. +namespace half_float { +class half; + +#if HALF_ENABLE_CPP11_USER_LITERALS +/// Library-defined half-precision literals. +/// Import this namespace to enable half-precision floating point literals: +/// ~~~~{.cpp} +/// using namespace half_float::literal; +/// half_float::half = 4.2_h; +/// ~~~~ +namespace literal { +half operator"" _h(long double); +} +#endif + +/// \internal +/// \brief Implementation details. +namespace detail { +#if HALF_ENABLE_CPP11_TYPE_TRAITS +/// Conditional type. +template +struct conditional : std::conditional {}; + +/// Helper for tag dispatching. +template struct bool_type : std::integral_constant {}; +using std::false_type; +using std::true_type; + +/// Type traits for floating point types. +template struct is_float : std::is_floating_point {}; +#else +/// Conditional type. +template struct conditional { typedef T type; }; +template struct conditional { + typedef F type; +}; + +/// Helper for tag dispatching. +template struct bool_type {}; +typedef bool_type true_type; +typedef bool_type false_type; + +/// Type traits for floating point types. +template struct is_float : false_type {}; +template struct is_float : is_float {}; +template struct is_float : is_float {}; +template struct is_float : is_float {}; +template <> struct is_float : true_type {}; +template <> struct is_float : true_type {}; +template <> struct is_float : true_type {}; +#endif + +/// Type traits for floating point bits. +template struct bits { typedef unsigned char type; }; +template struct bits : bits {}; +template struct bits : bits {}; +template struct bits : bits {}; + +#if HALF_ENABLE_CPP11_CSTDINT +/// Unsigned integer of (at least) 16 bits width. +typedef std::uint_least16_t uint16; + +/// Unsigned integer of (at least) 32 bits width. +template <> struct bits { typedef std::uint_least32_t type; }; + +/// Unsigned integer of (at least) 64 bits width. +template <> struct bits { typedef std::uint_least64_t type; }; +#else +/// Unsigned integer of (at least) 16 bits width. +typedef unsigned short uint16; + +/// Unsigned integer of (at least) 32 bits width. +template <> +struct bits + : conditional::digits >= 32, unsigned int, + unsigned long> {}; + +#if HALF_ENABLE_CPP11_LONG_LONG +/// Unsigned integer of (at least) 64 bits width. +template <> +struct bits + : conditional::digits >= 64, + unsigned long, unsigned long long> {}; +#else +/// Unsigned integer of (at least) 64 bits width. +template <> struct bits { typedef unsigned long type; }; +#endif +#endif + +/// Tag type for binary construction. +struct binary_t {}; + +/// Tag for binary construction. +HALF_CONSTEXPR_CONST binary_t binary = binary_t(); + +/// Temporary half-precision expression. +/// This class represents a half-precision expression which just stores a +/// single-precision value internally. +struct expr { + /// Conversion constructor. + /// \param f single-precision value to convert + explicit HALF_CONSTEXPR expr(float f) HALF_NOEXCEPT : value_(f) {} + + /// Conversion to single-precision. + /// \return single precision value representing expression value + HALF_CONSTEXPR operator float() const HALF_NOEXCEPT { return value_; } + + private: + /// Internal expression value stored in single-precision. + float value_; +}; + +/// SFINAE helper for generic half-precision functions. +/// This class template has to be specialized for each valid combination of +/// argument types to provide a corresponding +/// `type` member equivalent to \a T. +/// \tparam T type to return +template +struct enable {}; +template struct enable { typedef T type; }; +template struct enable { typedef T type; }; +template struct enable { typedef T type; }; +template struct enable { typedef T type; }; +template struct enable { typedef T type; }; +template struct enable { typedef T type; }; +template struct enable { typedef T type; }; +template struct enable { typedef T type; }; +template struct enable { typedef T type; }; +template struct enable { typedef T type; }; +template struct enable { typedef T type; }; +template struct enable { typedef T type; }; +template struct enable { typedef T type; }; +template struct enable { typedef T type; }; + +/// Return type for specialized generic 2-argument half-precision functions. +/// This class template has to be specialized for each valid combination of +/// argument types to provide a corresponding +/// `type` member denoting the appropriate return type. +/// \tparam T first argument type +/// \tparam U first argument type +template struct result : enable {}; +template <> struct result { typedef half type; }; + +/// \name Classification helpers +/// \{ + +/// Check for infinity. +/// \tparam T argument type (builtin floating point type) +/// \param arg value to query +/// \retval true if infinity +/// \retval false else +template bool builtin_isinf(T arg) { +#if HALF_ENABLE_CPP11_CMATH + return std::isinf(arg); +#elif defined(_MSC_VER) + return !::_finite(static_cast(arg)) && + !::_isnan(static_cast(arg)); +#else + return arg == std::numeric_limits::infinity() || + arg == -std::numeric_limits::infinity(); +#endif +} + +/// Check for NaN. +/// \tparam T argument type (builtin floating point type) +/// \param arg value to query +/// \retval true if not a number +/// \retval false else +template bool builtin_isnan(T arg) { +#if HALF_ENABLE_CPP11_CMATH + return std::isnan(arg); +#elif defined(_MSC_VER) + return ::_isnan(static_cast(arg)) != 0; +#else + return arg != arg; +#endif +} + +/// Check sign. +/// \tparam T argument type (builtin floating point type) +/// \param arg value to query +/// \retval true if signbit set +/// \retval false else +template bool builtin_signbit(T arg) { +#if HALF_ENABLE_CPP11_CMATH + return std::signbit(arg); +#else + return arg < T() || (arg == T() && T(1) / arg < T()); +#endif +} + +/// \} +/// \name Conversion +/// \{ + +/// Convert IEEE single-precision to half-precision. +/// Credit for this goes to [Jeroen van der +/// Zijp](ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf). +/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest +/// rounding +/// \param value single-precision value +/// \return binary representation of half-precision value +template +uint16 float2half_impl(float value, true_type) { + typedef bits::type uint32; + uint32 bits; // = *reinterpret_cast(&value); + // //violating + // strict aliasing! + std::memcpy(&bits, &value, sizeof(float)); + /* uint16 hbits = (bits>>16) & 0x8000; + bits &= 0x7FFFFFFF; + int exp = bits >> 23; + if(exp == 255) + return hbits | 0x7C00 | + (0x3FF&-static_cast((bits&0x7FFFFF)!=0)); + if(exp > 142) + { + if(R == std::round_toward_infinity) + return hbits | 0x7C00 - (hbits>>15); + if(R == std::round_toward_neg_infinity) + return hbits | 0x7BFF + (hbits>>15); + return hbits | 0x7BFF + (R!=std::round_toward_zero); + } + int g, s; + if(exp > 112) + { + g = (bits>>12) & 1; + s = (bits&0xFFF) != 0; + hbits |= ((exp-112)<<10) | ((bits>>13)&0x3FF); + } + else if(exp > 101) + { + int i = 125 - exp; + bits = (bits&0x7FFFFF) | 0x800000; + g = (bits>>i) & 1; + s = (bits&((1L<> (i+1); + } + else + { + g = 0; + s = bits != 0; + } + if(R == std::round_to_nearest) + #if HALF_ROUND_TIES_TO_EVEN + hbits += g & (s|hbits); + #else + hbits += g; + #endif + else if(R == std::round_toward_infinity) + hbits += ~(hbits>>15) & (s|g); + else if(R == std::round_toward_neg_infinity) + hbits += (hbits>>15) & (g|s); + */ + static const uint16 base_table[512] = { + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, + 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x0C00, 0x1000, + 0x1400, 0x1800, 0x1C00, 0x2000, 0x2400, 0x2800, 0x2C00, 0x3000, 0x3400, + 0x3800, 0x3C00, 0x4000, 0x4400, 0x4800, 0x4C00, 0x5000, 0x5400, 0x5800, + 0x5C00, 0x6000, 0x6400, 0x6800, 0x6C00, 0x7000, 0x7400, 0x7800, 0x7C00, + 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, + 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, + 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, + 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, + 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, + 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, + 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, + 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, + 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, + 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, + 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, + 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, + 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001, + 0x8002, 0x8004, 0x8008, 0x8010, 0x8020, 0x8040, 0x8080, 0x8100, 0x8200, + 0x8400, 0x8800, 0x8C00, 0x9000, 0x9400, 0x9800, 0x9C00, 0xA000, 0xA400, + 0xA800, 0xAC00, 0xB000, 0xB400, 0xB800, 0xBC00, 0xC000, 0xC400, 0xC800, + 0xCC00, 0xD000, 0xD400, 0xD800, 0xDC00, 0xE000, 0xE400, 0xE800, 0xEC00, + 0xF000, 0xF400, 0xF800, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, + 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, + 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, + 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, + 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, + 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, + 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, + 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, + 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, + 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, + 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, + 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, + 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00}; + static const unsigned char shift_table[512] = { + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 23, 22, 21, 20, 19, + 18, 17, 16, 15, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 13, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 23, + 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 13}; + uint16 hbits = + base_table[bits >> 23] + + static_cast((bits & 0x7FFFFF) >> shift_table[bits >> 23]); + if (R == std::round_to_nearest) + hbits += + (((bits & 0x7FFFFF) >> (shift_table[bits >> 23] - 1)) | + (((bits >> 23) & 0xFF) == 102)) & + ((hbits & 0x7C00) != 0x7C00) +#if HALF_ROUND_TIES_TO_EVEN + & (((((static_cast(1) << (shift_table[bits >> 23] - 1)) - 1) & + bits) != 0) | + hbits) +#endif + ; + else if (R == std::round_toward_zero) + hbits -= ((hbits & 0x7FFF) == 0x7C00) & ~shift_table[bits >> 23]; + else if (R == std::round_toward_infinity) + hbits += + ((((bits & 0x7FFFFF & + ((static_cast(1) << (shift_table[bits >> 23])) - 1)) != 0) | + (((bits >> 23) <= 102) & ((bits >> 23) != 0))) & + (hbits < 0x7C00)) - + ((hbits == 0xFC00) & ((bits >> 23) != 511)); + else if (R == std::round_toward_neg_infinity) + hbits += + ((((bits & 0x7FFFFF & + ((static_cast(1) << (shift_table[bits >> 23])) - 1)) != 0) | + (((bits >> 23) <= 358) & ((bits >> 23) != 256))) & + (hbits < 0xFC00) & (hbits >> 15)) - + ((hbits == 0x7C00) & ((bits >> 23) != 255)); + return hbits; +} + +/// Convert IEEE double-precision to half-precision. +/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest +/// rounding +/// \param value double-precision value +/// \return binary representation of half-precision value +template +uint16 float2half_impl(double value, true_type) { + typedef bits::type uint32; + typedef bits::type uint64; + uint64 bits; // = *reinterpret_cast(&value); + // //violating + // strict aliasing! + std::memcpy(&bits, &value, sizeof(double)); + uint32 hi = bits >> 32, lo = bits & 0xFFFFFFFF; + uint16 hbits = (hi >> 16) & 0x8000; + hi &= 0x7FFFFFFF; + int exp = hi >> 20; + if (exp == 2047) + return hbits | 0x7C00 | + (0x3FF & -static_cast((bits & 0xFFFFFFFFFFFFF) != 0)); + if (exp > 1038) { + if (R == std::round_toward_infinity) + return hbits | 0x7C00 - (hbits >> 15); + if (R == std::round_toward_neg_infinity) + return hbits | 0x7BFF + (hbits >> 15); + return hbits | 0x7BFF + (R != std::round_toward_zero); + } + int g, s = lo != 0; + if (exp > 1008) { + g = (hi >> 9) & 1; + s |= (hi & 0x1FF) != 0; + hbits |= ((exp - 1008) << 10) | ((hi >> 10) & 0x3FF); + } else if (exp > 997) { + int i = 1018 - exp; + hi = (hi & 0xFFFFF) | 0x100000; + g = (hi >> i) & 1; + s |= (hi & ((1L << i) - 1)) != 0; + hbits |= hi >> (i + 1); + } else { + g = 0; + s |= hi != 0; + } + if (R == std::round_to_nearest) +#if HALF_ROUND_TIES_TO_EVEN + hbits += g & (s | hbits); +#else + hbits += g; +#endif + else if (R == std::round_toward_infinity) + hbits += ~(hbits >> 15) & (s | g); + else if (R == std::round_toward_neg_infinity) + hbits += (hbits >> 15) & (g | s); + return hbits; +} + +/// Convert non-IEEE floating point to half-precision. +/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest +/// rounding +/// \tparam T source type (builtin floating point type) +/// \param value floating point value +/// \return binary representation of half-precision value +template +uint16 float2half_impl(T value, ...) { + uint16 hbits = static_cast(builtin_signbit(value)) << 15; + if (value == T()) + return hbits; + if (builtin_isnan(value)) + return hbits | 0x7FFF; + if (builtin_isinf(value)) + return hbits | 0x7C00; + int exp; + std::frexp(value, &exp); + if (exp > 16) { + if (R == std::round_toward_infinity) + return hbits | (0x7C00 - (hbits >> 15)); + else if (R == std::round_toward_neg_infinity) + return hbits | (0x7BFF + (hbits >> 15)); + return hbits | (0x7BFF + (R != std::round_toward_zero)); + } + if (exp < -13) + value = std::ldexp(value, 24); + else { + value = std::ldexp(value, 11 - exp); + hbits |= ((exp + 13) << 10); + } + T ival, frac = std::modf(value, &ival); + hbits += static_cast(std::abs(static_cast(ival))); + if (R == std::round_to_nearest) { + frac = std::abs(frac); +#if HALF_ROUND_TIES_TO_EVEN + hbits += (frac > T(0.5)) | ((frac == T(0.5)) & hbits); +#else + hbits += frac >= T(0.5); +#endif + } else if (R == std::round_toward_infinity) + hbits += frac > T(); + else if (R == std::round_toward_neg_infinity) + hbits += frac < T(); + return hbits; +} + +/// Convert floating point to half-precision. +/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest +/// rounding +/// \tparam T source type (builtin floating point type) +/// \param value floating point value +/// \return binary representation of half-precision value +template uint16 float2half(T value) { + return float2half_impl( + value, bool_type < std::numeric_limits::is_iec559 && + sizeof(typename bits::type) == sizeof(T) > ()); +} + +/// Convert integer to half-precision floating point. +/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest +/// rounding +/// \tparam S `true` if value negative, `false` else +/// \tparam T type to convert (builtin integer type) +/// \param value non-negative integral value +/// \return binary representation of half-precision value +template +uint16 int2half_impl(T value) { +#if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS + static_assert(std::is_integral::value, + "int to half conversion only supports builtin integer types"); +#endif + if (S) + value = -value; + uint16 bits = S << 15; + if (value > 0xFFFF) { + if (R == std::round_toward_infinity) + bits |= 0x7C00 - S; + else if (R == std::round_toward_neg_infinity) + bits |= 0x7BFF + S; + else + bits |= 0x7BFF + (R != std::round_toward_zero); + } else if (value) { + uint32_t m = value, exp = 24; + for (; m < 0x400; m <<= 1, --exp) + ; + for (; m > 0x7FF; m >>= 1, ++exp) + ; + bits |= (exp << 10) + m; + if (exp > 24) { + if (R == std::round_to_nearest) + bits += (value >> (exp - 25)) & 1 +#if HALF_ROUND_TIES_TO_EVEN + & (((((1 << (exp - 25)) - 1) & value) != 0) | bits) +#endif + ; + else if (R == std::round_toward_infinity) + bits += ((value & ((1 << (exp - 24)) - 1)) != 0) & !S; + else if (R == std::round_toward_neg_infinity) + bits += ((value & ((1 << (exp - 24)) - 1)) != 0) & S; + } + } + return bits; +} + +/// Convert integer to half-precision floating point. +/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest +/// rounding +/// \tparam T type to convert (builtin integer type) +/// \param value integral value +/// \return binary representation of half-precision value +template uint16 int2half(T value) { + return (value < 0) ? int2half_impl(value) + : int2half_impl(value); +} + +/// Convert half-precision to IEEE single-precision. +/// Credit for this goes to [Jeroen van der +/// Zijp](ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf). +/// \param value binary representation of half-precision value +/// \return single-precision value +inline float half2float_impl(uint16 value, float, true_type) { + typedef bits::type uint32; + /* uint32 bits = static_cast(value&0x8000) << 16; + int abs = value & 0x7FFF; + if(abs) + { + bits |= 0x38000000 << static_cast(abs>=0x7C00); + for(; abs<0x400; abs<<=1,bits-=0x800000) ; + bits += static_cast(abs) << 13; + } + */ + static const uint32 mantissa_table[2048] = { + 0x00000000, 0x33800000, 0x34000000, 0x34400000, 0x34800000, 0x34A00000, + 0x34C00000, 0x34E00000, 0x35000000, 0x35100000, 0x35200000, 0x35300000, + 0x35400000, 0x35500000, 0x35600000, 0x35700000, 0x35800000, 0x35880000, + 0x35900000, 0x35980000, 0x35A00000, 0x35A80000, 0x35B00000, 0x35B80000, + 0x35C00000, 0x35C80000, 0x35D00000, 0x35D80000, 0x35E00000, 0x35E80000, + 0x35F00000, 0x35F80000, 0x36000000, 0x36040000, 0x36080000, 0x360C0000, + 0x36100000, 0x36140000, 0x36180000, 0x361C0000, 0x36200000, 0x36240000, + 0x36280000, 0x362C0000, 0x36300000, 0x36340000, 0x36380000, 0x363C0000, + 0x36400000, 0x36440000, 0x36480000, 0x364C0000, 0x36500000, 0x36540000, + 0x36580000, 0x365C0000, 0x36600000, 0x36640000, 0x36680000, 0x366C0000, + 0x36700000, 0x36740000, 0x36780000, 0x367C0000, 0x36800000, 0x36820000, + 0x36840000, 0x36860000, 0x36880000, 0x368A0000, 0x368C0000, 0x368E0000, + 0x36900000, 0x36920000, 0x36940000, 0x36960000, 0x36980000, 0x369A0000, + 0x369C0000, 0x369E0000, 0x36A00000, 0x36A20000, 0x36A40000, 0x36A60000, + 0x36A80000, 0x36AA0000, 0x36AC0000, 0x36AE0000, 0x36B00000, 0x36B20000, + 0x36B40000, 0x36B60000, 0x36B80000, 0x36BA0000, 0x36BC0000, 0x36BE0000, + 0x36C00000, 0x36C20000, 0x36C40000, 0x36C60000, 0x36C80000, 0x36CA0000, + 0x36CC0000, 0x36CE0000, 0x36D00000, 0x36D20000, 0x36D40000, 0x36D60000, + 0x36D80000, 0x36DA0000, 0x36DC0000, 0x36DE0000, 0x36E00000, 0x36E20000, + 0x36E40000, 0x36E60000, 0x36E80000, 0x36EA0000, 0x36EC0000, 0x36EE0000, + 0x36F00000, 0x36F20000, 0x36F40000, 0x36F60000, 0x36F80000, 0x36FA0000, + 0x36FC0000, 0x36FE0000, 0x37000000, 0x37010000, 0x37020000, 0x37030000, + 0x37040000, 0x37050000, 0x37060000, 0x37070000, 0x37080000, 0x37090000, + 0x370A0000, 0x370B0000, 0x370C0000, 0x370D0000, 0x370E0000, 0x370F0000, + 0x37100000, 0x37110000, 0x37120000, 0x37130000, 0x37140000, 0x37150000, + 0x37160000, 0x37170000, 0x37180000, 0x37190000, 0x371A0000, 0x371B0000, + 0x371C0000, 0x371D0000, 0x371E0000, 0x371F0000, 0x37200000, 0x37210000, + 0x37220000, 0x37230000, 0x37240000, 0x37250000, 0x37260000, 0x37270000, + 0x37280000, 0x37290000, 0x372A0000, 0x372B0000, 0x372C0000, 0x372D0000, + 0x372E0000, 0x372F0000, 0x37300000, 0x37310000, 0x37320000, 0x37330000, + 0x37340000, 0x37350000, 0x37360000, 0x37370000, 0x37380000, 0x37390000, + 0x373A0000, 0x373B0000, 0x373C0000, 0x373D0000, 0x373E0000, 0x373F0000, + 0x37400000, 0x37410000, 0x37420000, 0x37430000, 0x37440000, 0x37450000, + 0x37460000, 0x37470000, 0x37480000, 0x37490000, 0x374A0000, 0x374B0000, + 0x374C0000, 0x374D0000, 0x374E0000, 0x374F0000, 0x37500000, 0x37510000, + 0x37520000, 0x37530000, 0x37540000, 0x37550000, 0x37560000, 0x37570000, + 0x37580000, 0x37590000, 0x375A0000, 0x375B0000, 0x375C0000, 0x375D0000, + 0x375E0000, 0x375F0000, 0x37600000, 0x37610000, 0x37620000, 0x37630000, + 0x37640000, 0x37650000, 0x37660000, 0x37670000, 0x37680000, 0x37690000, + 0x376A0000, 0x376B0000, 0x376C0000, 0x376D0000, 0x376E0000, 0x376F0000, + 0x37700000, 0x37710000, 0x37720000, 0x37730000, 0x37740000, 0x37750000, + 0x37760000, 0x37770000, 0x37780000, 0x37790000, 0x377A0000, 0x377B0000, + 0x377C0000, 0x377D0000, 0x377E0000, 0x377F0000, 0x37800000, 0x37808000, + 0x37810000, 0x37818000, 0x37820000, 0x37828000, 0x37830000, 0x37838000, + 0x37840000, 0x37848000, 0x37850000, 0x37858000, 0x37860000, 0x37868000, + 0x37870000, 0x37878000, 0x37880000, 0x37888000, 0x37890000, 0x37898000, + 0x378A0000, 0x378A8000, 0x378B0000, 0x378B8000, 0x378C0000, 0x378C8000, + 0x378D0000, 0x378D8000, 0x378E0000, 0x378E8000, 0x378F0000, 0x378F8000, + 0x37900000, 0x37908000, 0x37910000, 0x37918000, 0x37920000, 0x37928000, + 0x37930000, 0x37938000, 0x37940000, 0x37948000, 0x37950000, 0x37958000, + 0x37960000, 0x37968000, 0x37970000, 0x37978000, 0x37980000, 0x37988000, + 0x37990000, 0x37998000, 0x379A0000, 0x379A8000, 0x379B0000, 0x379B8000, + 0x379C0000, 0x379C8000, 0x379D0000, 0x379D8000, 0x379E0000, 0x379E8000, + 0x379F0000, 0x379F8000, 0x37A00000, 0x37A08000, 0x37A10000, 0x37A18000, + 0x37A20000, 0x37A28000, 0x37A30000, 0x37A38000, 0x37A40000, 0x37A48000, + 0x37A50000, 0x37A58000, 0x37A60000, 0x37A68000, 0x37A70000, 0x37A78000, + 0x37A80000, 0x37A88000, 0x37A90000, 0x37A98000, 0x37AA0000, 0x37AA8000, + 0x37AB0000, 0x37AB8000, 0x37AC0000, 0x37AC8000, 0x37AD0000, 0x37AD8000, + 0x37AE0000, 0x37AE8000, 0x37AF0000, 0x37AF8000, 0x37B00000, 0x37B08000, + 0x37B10000, 0x37B18000, 0x37B20000, 0x37B28000, 0x37B30000, 0x37B38000, + 0x37B40000, 0x37B48000, 0x37B50000, 0x37B58000, 0x37B60000, 0x37B68000, + 0x37B70000, 0x37B78000, 0x37B80000, 0x37B88000, 0x37B90000, 0x37B98000, + 0x37BA0000, 0x37BA8000, 0x37BB0000, 0x37BB8000, 0x37BC0000, 0x37BC8000, + 0x37BD0000, 0x37BD8000, 0x37BE0000, 0x37BE8000, 0x37BF0000, 0x37BF8000, + 0x37C00000, 0x37C08000, 0x37C10000, 0x37C18000, 0x37C20000, 0x37C28000, + 0x37C30000, 0x37C38000, 0x37C40000, 0x37C48000, 0x37C50000, 0x37C58000, + 0x37C60000, 0x37C68000, 0x37C70000, 0x37C78000, 0x37C80000, 0x37C88000, + 0x37C90000, 0x37C98000, 0x37CA0000, 0x37CA8000, 0x37CB0000, 0x37CB8000, + 0x37CC0000, 0x37CC8000, 0x37CD0000, 0x37CD8000, 0x37CE0000, 0x37CE8000, + 0x37CF0000, 0x37CF8000, 0x37D00000, 0x37D08000, 0x37D10000, 0x37D18000, + 0x37D20000, 0x37D28000, 0x37D30000, 0x37D38000, 0x37D40000, 0x37D48000, + 0x37D50000, 0x37D58000, 0x37D60000, 0x37D68000, 0x37D70000, 0x37D78000, + 0x37D80000, 0x37D88000, 0x37D90000, 0x37D98000, 0x37DA0000, 0x37DA8000, + 0x37DB0000, 0x37DB8000, 0x37DC0000, 0x37DC8000, 0x37DD0000, 0x37DD8000, + 0x37DE0000, 0x37DE8000, 0x37DF0000, 0x37DF8000, 0x37E00000, 0x37E08000, + 0x37E10000, 0x37E18000, 0x37E20000, 0x37E28000, 0x37E30000, 0x37E38000, + 0x37E40000, 0x37E48000, 0x37E50000, 0x37E58000, 0x37E60000, 0x37E68000, + 0x37E70000, 0x37E78000, 0x37E80000, 0x37E88000, 0x37E90000, 0x37E98000, + 0x37EA0000, 0x37EA8000, 0x37EB0000, 0x37EB8000, 0x37EC0000, 0x37EC8000, + 0x37ED0000, 0x37ED8000, 0x37EE0000, 0x37EE8000, 0x37EF0000, 0x37EF8000, + 0x37F00000, 0x37F08000, 0x37F10000, 0x37F18000, 0x37F20000, 0x37F28000, + 0x37F30000, 0x37F38000, 0x37F40000, 0x37F48000, 0x37F50000, 0x37F58000, + 0x37F60000, 0x37F68000, 0x37F70000, 0x37F78000, 0x37F80000, 0x37F88000, + 0x37F90000, 0x37F98000, 0x37FA0000, 0x37FA8000, 0x37FB0000, 0x37FB8000, + 0x37FC0000, 0x37FC8000, 0x37FD0000, 0x37FD8000, 0x37FE0000, 0x37FE8000, + 0x37FF0000, 0x37FF8000, 0x38000000, 0x38004000, 0x38008000, 0x3800C000, + 0x38010000, 0x38014000, 0x38018000, 0x3801C000, 0x38020000, 0x38024000, + 0x38028000, 0x3802C000, 0x38030000, 0x38034000, 0x38038000, 0x3803C000, + 0x38040000, 0x38044000, 0x38048000, 0x3804C000, 0x38050000, 0x38054000, + 0x38058000, 0x3805C000, 0x38060000, 0x38064000, 0x38068000, 0x3806C000, + 0x38070000, 0x38074000, 0x38078000, 0x3807C000, 0x38080000, 0x38084000, + 0x38088000, 0x3808C000, 0x38090000, 0x38094000, 0x38098000, 0x3809C000, + 0x380A0000, 0x380A4000, 0x380A8000, 0x380AC000, 0x380B0000, 0x380B4000, + 0x380B8000, 0x380BC000, 0x380C0000, 0x380C4000, 0x380C8000, 0x380CC000, + 0x380D0000, 0x380D4000, 0x380D8000, 0x380DC000, 0x380E0000, 0x380E4000, + 0x380E8000, 0x380EC000, 0x380F0000, 0x380F4000, 0x380F8000, 0x380FC000, + 0x38100000, 0x38104000, 0x38108000, 0x3810C000, 0x38110000, 0x38114000, + 0x38118000, 0x3811C000, 0x38120000, 0x38124000, 0x38128000, 0x3812C000, + 0x38130000, 0x38134000, 0x38138000, 0x3813C000, 0x38140000, 0x38144000, + 0x38148000, 0x3814C000, 0x38150000, 0x38154000, 0x38158000, 0x3815C000, + 0x38160000, 0x38164000, 0x38168000, 0x3816C000, 0x38170000, 0x38174000, + 0x38178000, 0x3817C000, 0x38180000, 0x38184000, 0x38188000, 0x3818C000, + 0x38190000, 0x38194000, 0x38198000, 0x3819C000, 0x381A0000, 0x381A4000, + 0x381A8000, 0x381AC000, 0x381B0000, 0x381B4000, 0x381B8000, 0x381BC000, + 0x381C0000, 0x381C4000, 0x381C8000, 0x381CC000, 0x381D0000, 0x381D4000, + 0x381D8000, 0x381DC000, 0x381E0000, 0x381E4000, 0x381E8000, 0x381EC000, + 0x381F0000, 0x381F4000, 0x381F8000, 0x381FC000, 0x38200000, 0x38204000, + 0x38208000, 0x3820C000, 0x38210000, 0x38214000, 0x38218000, 0x3821C000, + 0x38220000, 0x38224000, 0x38228000, 0x3822C000, 0x38230000, 0x38234000, + 0x38238000, 0x3823C000, 0x38240000, 0x38244000, 0x38248000, 0x3824C000, + 0x38250000, 0x38254000, 0x38258000, 0x3825C000, 0x38260000, 0x38264000, + 0x38268000, 0x3826C000, 0x38270000, 0x38274000, 0x38278000, 0x3827C000, + 0x38280000, 0x38284000, 0x38288000, 0x3828C000, 0x38290000, 0x38294000, + 0x38298000, 0x3829C000, 0x382A0000, 0x382A4000, 0x382A8000, 0x382AC000, + 0x382B0000, 0x382B4000, 0x382B8000, 0x382BC000, 0x382C0000, 0x382C4000, + 0x382C8000, 0x382CC000, 0x382D0000, 0x382D4000, 0x382D8000, 0x382DC000, + 0x382E0000, 0x382E4000, 0x382E8000, 0x382EC000, 0x382F0000, 0x382F4000, + 0x382F8000, 0x382FC000, 0x38300000, 0x38304000, 0x38308000, 0x3830C000, + 0x38310000, 0x38314000, 0x38318000, 0x3831C000, 0x38320000, 0x38324000, + 0x38328000, 0x3832C000, 0x38330000, 0x38334000, 0x38338000, 0x3833C000, + 0x38340000, 0x38344000, 0x38348000, 0x3834C000, 0x38350000, 0x38354000, + 0x38358000, 0x3835C000, 0x38360000, 0x38364000, 0x38368000, 0x3836C000, + 0x38370000, 0x38374000, 0x38378000, 0x3837C000, 0x38380000, 0x38384000, + 0x38388000, 0x3838C000, 0x38390000, 0x38394000, 0x38398000, 0x3839C000, + 0x383A0000, 0x383A4000, 0x383A8000, 0x383AC000, 0x383B0000, 0x383B4000, + 0x383B8000, 0x383BC000, 0x383C0000, 0x383C4000, 0x383C8000, 0x383CC000, + 0x383D0000, 0x383D4000, 0x383D8000, 0x383DC000, 0x383E0000, 0x383E4000, + 0x383E8000, 0x383EC000, 0x383F0000, 0x383F4000, 0x383F8000, 0x383FC000, + 0x38400000, 0x38404000, 0x38408000, 0x3840C000, 0x38410000, 0x38414000, + 0x38418000, 0x3841C000, 0x38420000, 0x38424000, 0x38428000, 0x3842C000, + 0x38430000, 0x38434000, 0x38438000, 0x3843C000, 0x38440000, 0x38444000, + 0x38448000, 0x3844C000, 0x38450000, 0x38454000, 0x38458000, 0x3845C000, + 0x38460000, 0x38464000, 0x38468000, 0x3846C000, 0x38470000, 0x38474000, + 0x38478000, 0x3847C000, 0x38480000, 0x38484000, 0x38488000, 0x3848C000, + 0x38490000, 0x38494000, 0x38498000, 0x3849C000, 0x384A0000, 0x384A4000, + 0x384A8000, 0x384AC000, 0x384B0000, 0x384B4000, 0x384B8000, 0x384BC000, + 0x384C0000, 0x384C4000, 0x384C8000, 0x384CC000, 0x384D0000, 0x384D4000, + 0x384D8000, 0x384DC000, 0x384E0000, 0x384E4000, 0x384E8000, 0x384EC000, + 0x384F0000, 0x384F4000, 0x384F8000, 0x384FC000, 0x38500000, 0x38504000, + 0x38508000, 0x3850C000, 0x38510000, 0x38514000, 0x38518000, 0x3851C000, + 0x38520000, 0x38524000, 0x38528000, 0x3852C000, 0x38530000, 0x38534000, + 0x38538000, 0x3853C000, 0x38540000, 0x38544000, 0x38548000, 0x3854C000, + 0x38550000, 0x38554000, 0x38558000, 0x3855C000, 0x38560000, 0x38564000, + 0x38568000, 0x3856C000, 0x38570000, 0x38574000, 0x38578000, 0x3857C000, + 0x38580000, 0x38584000, 0x38588000, 0x3858C000, 0x38590000, 0x38594000, + 0x38598000, 0x3859C000, 0x385A0000, 0x385A4000, 0x385A8000, 0x385AC000, + 0x385B0000, 0x385B4000, 0x385B8000, 0x385BC000, 0x385C0000, 0x385C4000, + 0x385C8000, 0x385CC000, 0x385D0000, 0x385D4000, 0x385D8000, 0x385DC000, + 0x385E0000, 0x385E4000, 0x385E8000, 0x385EC000, 0x385F0000, 0x385F4000, + 0x385F8000, 0x385FC000, 0x38600000, 0x38604000, 0x38608000, 0x3860C000, + 0x38610000, 0x38614000, 0x38618000, 0x3861C000, 0x38620000, 0x38624000, + 0x38628000, 0x3862C000, 0x38630000, 0x38634000, 0x38638000, 0x3863C000, + 0x38640000, 0x38644000, 0x38648000, 0x3864C000, 0x38650000, 0x38654000, + 0x38658000, 0x3865C000, 0x38660000, 0x38664000, 0x38668000, 0x3866C000, + 0x38670000, 0x38674000, 0x38678000, 0x3867C000, 0x38680000, 0x38684000, + 0x38688000, 0x3868C000, 0x38690000, 0x38694000, 0x38698000, 0x3869C000, + 0x386A0000, 0x386A4000, 0x386A8000, 0x386AC000, 0x386B0000, 0x386B4000, + 0x386B8000, 0x386BC000, 0x386C0000, 0x386C4000, 0x386C8000, 0x386CC000, + 0x386D0000, 0x386D4000, 0x386D8000, 0x386DC000, 0x386E0000, 0x386E4000, + 0x386E8000, 0x386EC000, 0x386F0000, 0x386F4000, 0x386F8000, 0x386FC000, + 0x38700000, 0x38704000, 0x38708000, 0x3870C000, 0x38710000, 0x38714000, + 0x38718000, 0x3871C000, 0x38720000, 0x38724000, 0x38728000, 0x3872C000, + 0x38730000, 0x38734000, 0x38738000, 0x3873C000, 0x38740000, 0x38744000, + 0x38748000, 0x3874C000, 0x38750000, 0x38754000, 0x38758000, 0x3875C000, + 0x38760000, 0x38764000, 0x38768000, 0x3876C000, 0x38770000, 0x38774000, + 0x38778000, 0x3877C000, 0x38780000, 0x38784000, 0x38788000, 0x3878C000, + 0x38790000, 0x38794000, 0x38798000, 0x3879C000, 0x387A0000, 0x387A4000, + 0x387A8000, 0x387AC000, 0x387B0000, 0x387B4000, 0x387B8000, 0x387BC000, + 0x387C0000, 0x387C4000, 0x387C8000, 0x387CC000, 0x387D0000, 0x387D4000, + 0x387D8000, 0x387DC000, 0x387E0000, 0x387E4000, 0x387E8000, 0x387EC000, + 0x387F0000, 0x387F4000, 0x387F8000, 0x387FC000, 0x38000000, 0x38002000, + 0x38004000, 0x38006000, 0x38008000, 0x3800A000, 0x3800C000, 0x3800E000, + 0x38010000, 0x38012000, 0x38014000, 0x38016000, 0x38018000, 0x3801A000, + 0x3801C000, 0x3801E000, 0x38020000, 0x38022000, 0x38024000, 0x38026000, + 0x38028000, 0x3802A000, 0x3802C000, 0x3802E000, 0x38030000, 0x38032000, + 0x38034000, 0x38036000, 0x38038000, 0x3803A000, 0x3803C000, 0x3803E000, + 0x38040000, 0x38042000, 0x38044000, 0x38046000, 0x38048000, 0x3804A000, + 0x3804C000, 0x3804E000, 0x38050000, 0x38052000, 0x38054000, 0x38056000, + 0x38058000, 0x3805A000, 0x3805C000, 0x3805E000, 0x38060000, 0x38062000, + 0x38064000, 0x38066000, 0x38068000, 0x3806A000, 0x3806C000, 0x3806E000, + 0x38070000, 0x38072000, 0x38074000, 0x38076000, 0x38078000, 0x3807A000, + 0x3807C000, 0x3807E000, 0x38080000, 0x38082000, 0x38084000, 0x38086000, + 0x38088000, 0x3808A000, 0x3808C000, 0x3808E000, 0x38090000, 0x38092000, + 0x38094000, 0x38096000, 0x38098000, 0x3809A000, 0x3809C000, 0x3809E000, + 0x380A0000, 0x380A2000, 0x380A4000, 0x380A6000, 0x380A8000, 0x380AA000, + 0x380AC000, 0x380AE000, 0x380B0000, 0x380B2000, 0x380B4000, 0x380B6000, + 0x380B8000, 0x380BA000, 0x380BC000, 0x380BE000, 0x380C0000, 0x380C2000, + 0x380C4000, 0x380C6000, 0x380C8000, 0x380CA000, 0x380CC000, 0x380CE000, + 0x380D0000, 0x380D2000, 0x380D4000, 0x380D6000, 0x380D8000, 0x380DA000, + 0x380DC000, 0x380DE000, 0x380E0000, 0x380E2000, 0x380E4000, 0x380E6000, + 0x380E8000, 0x380EA000, 0x380EC000, 0x380EE000, 0x380F0000, 0x380F2000, + 0x380F4000, 0x380F6000, 0x380F8000, 0x380FA000, 0x380FC000, 0x380FE000, + 0x38100000, 0x38102000, 0x38104000, 0x38106000, 0x38108000, 0x3810A000, + 0x3810C000, 0x3810E000, 0x38110000, 0x38112000, 0x38114000, 0x38116000, + 0x38118000, 0x3811A000, 0x3811C000, 0x3811E000, 0x38120000, 0x38122000, + 0x38124000, 0x38126000, 0x38128000, 0x3812A000, 0x3812C000, 0x3812E000, + 0x38130000, 0x38132000, 0x38134000, 0x38136000, 0x38138000, 0x3813A000, + 0x3813C000, 0x3813E000, 0x38140000, 0x38142000, 0x38144000, 0x38146000, + 0x38148000, 0x3814A000, 0x3814C000, 0x3814E000, 0x38150000, 0x38152000, + 0x38154000, 0x38156000, 0x38158000, 0x3815A000, 0x3815C000, 0x3815E000, + 0x38160000, 0x38162000, 0x38164000, 0x38166000, 0x38168000, 0x3816A000, + 0x3816C000, 0x3816E000, 0x38170000, 0x38172000, 0x38174000, 0x38176000, + 0x38178000, 0x3817A000, 0x3817C000, 0x3817E000, 0x38180000, 0x38182000, + 0x38184000, 0x38186000, 0x38188000, 0x3818A000, 0x3818C000, 0x3818E000, + 0x38190000, 0x38192000, 0x38194000, 0x38196000, 0x38198000, 0x3819A000, + 0x3819C000, 0x3819E000, 0x381A0000, 0x381A2000, 0x381A4000, 0x381A6000, + 0x381A8000, 0x381AA000, 0x381AC000, 0x381AE000, 0x381B0000, 0x381B2000, + 0x381B4000, 0x381B6000, 0x381B8000, 0x381BA000, 0x381BC000, 0x381BE000, + 0x381C0000, 0x381C2000, 0x381C4000, 0x381C6000, 0x381C8000, 0x381CA000, + 0x381CC000, 0x381CE000, 0x381D0000, 0x381D2000, 0x381D4000, 0x381D6000, + 0x381D8000, 0x381DA000, 0x381DC000, 0x381DE000, 0x381E0000, 0x381E2000, + 0x381E4000, 0x381E6000, 0x381E8000, 0x381EA000, 0x381EC000, 0x381EE000, + 0x381F0000, 0x381F2000, 0x381F4000, 0x381F6000, 0x381F8000, 0x381FA000, + 0x381FC000, 0x381FE000, 0x38200000, 0x38202000, 0x38204000, 0x38206000, + 0x38208000, 0x3820A000, 0x3820C000, 0x3820E000, 0x38210000, 0x38212000, + 0x38214000, 0x38216000, 0x38218000, 0x3821A000, 0x3821C000, 0x3821E000, + 0x38220000, 0x38222000, 0x38224000, 0x38226000, 0x38228000, 0x3822A000, + 0x3822C000, 0x3822E000, 0x38230000, 0x38232000, 0x38234000, 0x38236000, + 0x38238000, 0x3823A000, 0x3823C000, 0x3823E000, 0x38240000, 0x38242000, + 0x38244000, 0x38246000, 0x38248000, 0x3824A000, 0x3824C000, 0x3824E000, + 0x38250000, 0x38252000, 0x38254000, 0x38256000, 0x38258000, 0x3825A000, + 0x3825C000, 0x3825E000, 0x38260000, 0x38262000, 0x38264000, 0x38266000, + 0x38268000, 0x3826A000, 0x3826C000, 0x3826E000, 0x38270000, 0x38272000, + 0x38274000, 0x38276000, 0x38278000, 0x3827A000, 0x3827C000, 0x3827E000, + 0x38280000, 0x38282000, 0x38284000, 0x38286000, 0x38288000, 0x3828A000, + 0x3828C000, 0x3828E000, 0x38290000, 0x38292000, 0x38294000, 0x38296000, + 0x38298000, 0x3829A000, 0x3829C000, 0x3829E000, 0x382A0000, 0x382A2000, + 0x382A4000, 0x382A6000, 0x382A8000, 0x382AA000, 0x382AC000, 0x382AE000, + 0x382B0000, 0x382B2000, 0x382B4000, 0x382B6000, 0x382B8000, 0x382BA000, + 0x382BC000, 0x382BE000, 0x382C0000, 0x382C2000, 0x382C4000, 0x382C6000, + 0x382C8000, 0x382CA000, 0x382CC000, 0x382CE000, 0x382D0000, 0x382D2000, + 0x382D4000, 0x382D6000, 0x382D8000, 0x382DA000, 0x382DC000, 0x382DE000, + 0x382E0000, 0x382E2000, 0x382E4000, 0x382E6000, 0x382E8000, 0x382EA000, + 0x382EC000, 0x382EE000, 0x382F0000, 0x382F2000, 0x382F4000, 0x382F6000, + 0x382F8000, 0x382FA000, 0x382FC000, 0x382FE000, 0x38300000, 0x38302000, + 0x38304000, 0x38306000, 0x38308000, 0x3830A000, 0x3830C000, 0x3830E000, + 0x38310000, 0x38312000, 0x38314000, 0x38316000, 0x38318000, 0x3831A000, + 0x3831C000, 0x3831E000, 0x38320000, 0x38322000, 0x38324000, 0x38326000, + 0x38328000, 0x3832A000, 0x3832C000, 0x3832E000, 0x38330000, 0x38332000, + 0x38334000, 0x38336000, 0x38338000, 0x3833A000, 0x3833C000, 0x3833E000, + 0x38340000, 0x38342000, 0x38344000, 0x38346000, 0x38348000, 0x3834A000, + 0x3834C000, 0x3834E000, 0x38350000, 0x38352000, 0x38354000, 0x38356000, + 0x38358000, 0x3835A000, 0x3835C000, 0x3835E000, 0x38360000, 0x38362000, + 0x38364000, 0x38366000, 0x38368000, 0x3836A000, 0x3836C000, 0x3836E000, + 0x38370000, 0x38372000, 0x38374000, 0x38376000, 0x38378000, 0x3837A000, + 0x3837C000, 0x3837E000, 0x38380000, 0x38382000, 0x38384000, 0x38386000, + 0x38388000, 0x3838A000, 0x3838C000, 0x3838E000, 0x38390000, 0x38392000, + 0x38394000, 0x38396000, 0x38398000, 0x3839A000, 0x3839C000, 0x3839E000, + 0x383A0000, 0x383A2000, 0x383A4000, 0x383A6000, 0x383A8000, 0x383AA000, + 0x383AC000, 0x383AE000, 0x383B0000, 0x383B2000, 0x383B4000, 0x383B6000, + 0x383B8000, 0x383BA000, 0x383BC000, 0x383BE000, 0x383C0000, 0x383C2000, + 0x383C4000, 0x383C6000, 0x383C8000, 0x383CA000, 0x383CC000, 0x383CE000, + 0x383D0000, 0x383D2000, 0x383D4000, 0x383D6000, 0x383D8000, 0x383DA000, + 0x383DC000, 0x383DE000, 0x383E0000, 0x383E2000, 0x383E4000, 0x383E6000, + 0x383E8000, 0x383EA000, 0x383EC000, 0x383EE000, 0x383F0000, 0x383F2000, + 0x383F4000, 0x383F6000, 0x383F8000, 0x383FA000, 0x383FC000, 0x383FE000, + 0x38400000, 0x38402000, 0x38404000, 0x38406000, 0x38408000, 0x3840A000, + 0x3840C000, 0x3840E000, 0x38410000, 0x38412000, 0x38414000, 0x38416000, + 0x38418000, 0x3841A000, 0x3841C000, 0x3841E000, 0x38420000, 0x38422000, + 0x38424000, 0x38426000, 0x38428000, 0x3842A000, 0x3842C000, 0x3842E000, + 0x38430000, 0x38432000, 0x38434000, 0x38436000, 0x38438000, 0x3843A000, + 0x3843C000, 0x3843E000, 0x38440000, 0x38442000, 0x38444000, 0x38446000, + 0x38448000, 0x3844A000, 0x3844C000, 0x3844E000, 0x38450000, 0x38452000, + 0x38454000, 0x38456000, 0x38458000, 0x3845A000, 0x3845C000, 0x3845E000, + 0x38460000, 0x38462000, 0x38464000, 0x38466000, 0x38468000, 0x3846A000, + 0x3846C000, 0x3846E000, 0x38470000, 0x38472000, 0x38474000, 0x38476000, + 0x38478000, 0x3847A000, 0x3847C000, 0x3847E000, 0x38480000, 0x38482000, + 0x38484000, 0x38486000, 0x38488000, 0x3848A000, 0x3848C000, 0x3848E000, + 0x38490000, 0x38492000, 0x38494000, 0x38496000, 0x38498000, 0x3849A000, + 0x3849C000, 0x3849E000, 0x384A0000, 0x384A2000, 0x384A4000, 0x384A6000, + 0x384A8000, 0x384AA000, 0x384AC000, 0x384AE000, 0x384B0000, 0x384B2000, + 0x384B4000, 0x384B6000, 0x384B8000, 0x384BA000, 0x384BC000, 0x384BE000, + 0x384C0000, 0x384C2000, 0x384C4000, 0x384C6000, 0x384C8000, 0x384CA000, + 0x384CC000, 0x384CE000, 0x384D0000, 0x384D2000, 0x384D4000, 0x384D6000, + 0x384D8000, 0x384DA000, 0x384DC000, 0x384DE000, 0x384E0000, 0x384E2000, + 0x384E4000, 0x384E6000, 0x384E8000, 0x384EA000, 0x384EC000, 0x384EE000, + 0x384F0000, 0x384F2000, 0x384F4000, 0x384F6000, 0x384F8000, 0x384FA000, + 0x384FC000, 0x384FE000, 0x38500000, 0x38502000, 0x38504000, 0x38506000, + 0x38508000, 0x3850A000, 0x3850C000, 0x3850E000, 0x38510000, 0x38512000, + 0x38514000, 0x38516000, 0x38518000, 0x3851A000, 0x3851C000, 0x3851E000, + 0x38520000, 0x38522000, 0x38524000, 0x38526000, 0x38528000, 0x3852A000, + 0x3852C000, 0x3852E000, 0x38530000, 0x38532000, 0x38534000, 0x38536000, + 0x38538000, 0x3853A000, 0x3853C000, 0x3853E000, 0x38540000, 0x38542000, + 0x38544000, 0x38546000, 0x38548000, 0x3854A000, 0x3854C000, 0x3854E000, + 0x38550000, 0x38552000, 0x38554000, 0x38556000, 0x38558000, 0x3855A000, + 0x3855C000, 0x3855E000, 0x38560000, 0x38562000, 0x38564000, 0x38566000, + 0x38568000, 0x3856A000, 0x3856C000, 0x3856E000, 0x38570000, 0x38572000, + 0x38574000, 0x38576000, 0x38578000, 0x3857A000, 0x3857C000, 0x3857E000, + 0x38580000, 0x38582000, 0x38584000, 0x38586000, 0x38588000, 0x3858A000, + 0x3858C000, 0x3858E000, 0x38590000, 0x38592000, 0x38594000, 0x38596000, + 0x38598000, 0x3859A000, 0x3859C000, 0x3859E000, 0x385A0000, 0x385A2000, + 0x385A4000, 0x385A6000, 0x385A8000, 0x385AA000, 0x385AC000, 0x385AE000, + 0x385B0000, 0x385B2000, 0x385B4000, 0x385B6000, 0x385B8000, 0x385BA000, + 0x385BC000, 0x385BE000, 0x385C0000, 0x385C2000, 0x385C4000, 0x385C6000, + 0x385C8000, 0x385CA000, 0x385CC000, 0x385CE000, 0x385D0000, 0x385D2000, + 0x385D4000, 0x385D6000, 0x385D8000, 0x385DA000, 0x385DC000, 0x385DE000, + 0x385E0000, 0x385E2000, 0x385E4000, 0x385E6000, 0x385E8000, 0x385EA000, + 0x385EC000, 0x385EE000, 0x385F0000, 0x385F2000, 0x385F4000, 0x385F6000, + 0x385F8000, 0x385FA000, 0x385FC000, 0x385FE000, 0x38600000, 0x38602000, + 0x38604000, 0x38606000, 0x38608000, 0x3860A000, 0x3860C000, 0x3860E000, + 0x38610000, 0x38612000, 0x38614000, 0x38616000, 0x38618000, 0x3861A000, + 0x3861C000, 0x3861E000, 0x38620000, 0x38622000, 0x38624000, 0x38626000, + 0x38628000, 0x3862A000, 0x3862C000, 0x3862E000, 0x38630000, 0x38632000, + 0x38634000, 0x38636000, 0x38638000, 0x3863A000, 0x3863C000, 0x3863E000, + 0x38640000, 0x38642000, 0x38644000, 0x38646000, 0x38648000, 0x3864A000, + 0x3864C000, 0x3864E000, 0x38650000, 0x38652000, 0x38654000, 0x38656000, + 0x38658000, 0x3865A000, 0x3865C000, 0x3865E000, 0x38660000, 0x38662000, + 0x38664000, 0x38666000, 0x38668000, 0x3866A000, 0x3866C000, 0x3866E000, + 0x38670000, 0x38672000, 0x38674000, 0x38676000, 0x38678000, 0x3867A000, + 0x3867C000, 0x3867E000, 0x38680000, 0x38682000, 0x38684000, 0x38686000, + 0x38688000, 0x3868A000, 0x3868C000, 0x3868E000, 0x38690000, 0x38692000, + 0x38694000, 0x38696000, 0x38698000, 0x3869A000, 0x3869C000, 0x3869E000, + 0x386A0000, 0x386A2000, 0x386A4000, 0x386A6000, 0x386A8000, 0x386AA000, + 0x386AC000, 0x386AE000, 0x386B0000, 0x386B2000, 0x386B4000, 0x386B6000, + 0x386B8000, 0x386BA000, 0x386BC000, 0x386BE000, 0x386C0000, 0x386C2000, + 0x386C4000, 0x386C6000, 0x386C8000, 0x386CA000, 0x386CC000, 0x386CE000, + 0x386D0000, 0x386D2000, 0x386D4000, 0x386D6000, 0x386D8000, 0x386DA000, + 0x386DC000, 0x386DE000, 0x386E0000, 0x386E2000, 0x386E4000, 0x386E6000, + 0x386E8000, 0x386EA000, 0x386EC000, 0x386EE000, 0x386F0000, 0x386F2000, + 0x386F4000, 0x386F6000, 0x386F8000, 0x386FA000, 0x386FC000, 0x386FE000, + 0x38700000, 0x38702000, 0x38704000, 0x38706000, 0x38708000, 0x3870A000, + 0x3870C000, 0x3870E000, 0x38710000, 0x38712000, 0x38714000, 0x38716000, + 0x38718000, 0x3871A000, 0x3871C000, 0x3871E000, 0x38720000, 0x38722000, + 0x38724000, 0x38726000, 0x38728000, 0x3872A000, 0x3872C000, 0x3872E000, + 0x38730000, 0x38732000, 0x38734000, 0x38736000, 0x38738000, 0x3873A000, + 0x3873C000, 0x3873E000, 0x38740000, 0x38742000, 0x38744000, 0x38746000, + 0x38748000, 0x3874A000, 0x3874C000, 0x3874E000, 0x38750000, 0x38752000, + 0x38754000, 0x38756000, 0x38758000, 0x3875A000, 0x3875C000, 0x3875E000, + 0x38760000, 0x38762000, 0x38764000, 0x38766000, 0x38768000, 0x3876A000, + 0x3876C000, 0x3876E000, 0x38770000, 0x38772000, 0x38774000, 0x38776000, + 0x38778000, 0x3877A000, 0x3877C000, 0x3877E000, 0x38780000, 0x38782000, + 0x38784000, 0x38786000, 0x38788000, 0x3878A000, 0x3878C000, 0x3878E000, + 0x38790000, 0x38792000, 0x38794000, 0x38796000, 0x38798000, 0x3879A000, + 0x3879C000, 0x3879E000, 0x387A0000, 0x387A2000, 0x387A4000, 0x387A6000, + 0x387A8000, 0x387AA000, 0x387AC000, 0x387AE000, 0x387B0000, 0x387B2000, + 0x387B4000, 0x387B6000, 0x387B8000, 0x387BA000, 0x387BC000, 0x387BE000, + 0x387C0000, 0x387C2000, 0x387C4000, 0x387C6000, 0x387C8000, 0x387CA000, + 0x387CC000, 0x387CE000, 0x387D0000, 0x387D2000, 0x387D4000, 0x387D6000, + 0x387D8000, 0x387DA000, 0x387DC000, 0x387DE000, 0x387E0000, 0x387E2000, + 0x387E4000, 0x387E6000, 0x387E8000, 0x387EA000, 0x387EC000, 0x387EE000, + 0x387F0000, 0x387F2000, 0x387F4000, 0x387F6000, 0x387F8000, 0x387FA000, + 0x387FC000, 0x387FE000}; + static const uint32 exponent_table[64] = { + 0x00000000, 0x00800000, 0x01000000, 0x01800000, 0x02000000, 0x02800000, + 0x03000000, 0x03800000, 0x04000000, 0x04800000, 0x05000000, 0x05800000, + 0x06000000, 0x06800000, 0x07000000, 0x07800000, 0x08000000, 0x08800000, + 0x09000000, 0x09800000, 0x0A000000, 0x0A800000, 0x0B000000, 0x0B800000, + 0x0C000000, 0x0C800000, 0x0D000000, 0x0D800000, 0x0E000000, 0x0E800000, + 0x0F000000, 0x47800000, 0x80000000, 0x80800000, 0x81000000, 0x81800000, + 0x82000000, 0x82800000, 0x83000000, 0x83800000, 0x84000000, 0x84800000, + 0x85000000, 0x85800000, 0x86000000, 0x86800000, 0x87000000, 0x87800000, + 0x88000000, 0x88800000, 0x89000000, 0x89800000, 0x8A000000, 0x8A800000, + 0x8B000000, 0x8B800000, 0x8C000000, 0x8C800000, 0x8D000000, 0x8D800000, + 0x8E000000, 0x8E800000, 0x8F000000, 0xC7800000}; + static const unsigned short offset_table[64] = { + 0, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, + 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, + 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 0, + 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, + 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, + 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024}; + uint32 bits = mantissa_table[offset_table[value >> 10] + (value & 0x3FF)] + + exponent_table[value >> 10]; + // return *reinterpret_cast(&bits); + ////violating + // strict aliasing! + float out; + std::memcpy(&out, &bits, sizeof(float)); + return out; +} + +/// Convert half-precision to IEEE double-precision. +/// \param value binary representation of half-precision value +/// \return double-precision value +inline double half2float_impl(uint16 value, double, true_type) { + typedef bits::type uint32; + typedef bits::type uint64; + uint32 hi = static_cast(value & 0x8000) << 16; + int abs = value & 0x7FFF; + if (abs) { + hi |= 0x3F000000 << static_cast(abs >= 0x7C00); + for (; abs < 0x400; abs <<= 1, hi -= 0x100000) + ; + hi += static_cast(abs) << 10; + } + uint64 bits = static_cast(hi) << 32; + // return *reinterpret_cast(&bits); + ////violating + // strict aliasing! + double out; + std::memcpy(&out, &bits, sizeof(double)); + return out; +} + +/// Convert half-precision to non-IEEE floating point. +/// \tparam T type to convert to (builtin integer type) +/// \param value binary representation of half-precision value +/// \return floating point value +template T half2float_impl(uint16 value, T, ...) { + T out; + int abs = value & 0x7FFF; + if (abs > 0x7C00) + out = std::numeric_limits::has_quiet_NaN + ? std::numeric_limits::quiet_NaN() + : T(); + else if (abs == 0x7C00) + out = std::numeric_limits::has_infinity + ? std::numeric_limits::infinity() + : std::numeric_limits::max(); + else if (abs > 0x3FF) + out = std::ldexp(static_cast((abs & 0x3FF) | 0x400), (abs >> 10) - 25); + else + out = std::ldexp(static_cast(abs), -24); + return (value & 0x8000) ? -out : out; +} + +/// Convert half-precision to floating point. +/// \tparam T type to convert to (builtin integer type) +/// \param value binary representation of half-precision value +/// \return floating point value +template T half2float(uint16 value) { + return half2float_impl(value, T(), + bool_type < std::numeric_limits::is_iec559 && + sizeof(typename bits::type) == sizeof(T) > ()); +} + +/// Convert half-precision floating point to integer. +/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest +/// rounding +/// \tparam E `true` for round to even, `false` for round away from zero +/// \tparam T type to convert to (buitlin integer type with at least 16 bits +/// precision, excluding any implicit sign +/// bits) \param value binary representation of half-precision value \return +/// integral value +template +T half2int_impl(uint16 value) { +#if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS + static_assert(std::is_integral::value, + "half to int conversion only supports builtin integer types"); +#endif + uint32_t e = value & 0x7FFF; + if (e >= 0x7C00) + return (value & 0x8000) ? std::numeric_limits::min() + : std::numeric_limits::max(); + if (e < 0x3800) { + if (R == std::round_toward_infinity) + return T(~(value >> 15) & (e != 0)); + else if (R == std::round_toward_neg_infinity) + return -T(value > 0x8000); + return T(); + } + uint32_t m = (value & 0x3FF) | 0x400; + e >>= 10; + if (e < 25) { + if (R == std::round_to_nearest) + m += (1 << (24 - e)) - (~(m >> (25 - e)) & E); + else if (R == std::round_toward_infinity) + m += ((value >> 15) - 1) & ((1 << (25 - e)) - 1U); + else if (R == std::round_toward_neg_infinity) + m += -(value >> 15) & ((1 << (25 - e)) - 1U); + m >>= 25 - e; + } else + m <<= e - 25; + return (value & 0x8000) ? -static_cast(m) : static_cast(m); +} + +/// Convert half-precision floating point to integer. +/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest +/// rounding +/// \tparam T type to convert to (buitlin integer type with at least 16 bits +/// precision, excluding any implicit sign +/// bits) \param value binary representation of half-precision value \return +/// integral value +template T half2int(uint16 value) { + return half2int_impl(value); +} + +/// Convert half-precision floating point to integer using +/// round-to-nearest-away-from-zero. +/// \tparam T type to convert to (buitlin integer type with at least 16 bits +/// precision, excluding any implicit sign +/// bits) \param value binary representation of half-precision value \return +/// integral value +template T half2int_up(uint16 value) { + return half2int_impl(value); +} + +/// Round half-precision number to nearest integer value. +/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest +/// rounding +/// \tparam E `true` for round to even, `false` for round away from zero +/// \param value binary representation of half-precision value +/// \return half-precision bits for nearest integral value +template +uint16 round_half_impl(uint16 value) { + uint32_t e = value & 0x7FFF; + uint16 result = value; + if (e < 0x3C00) { + result &= 0x8000; + if (R == std::round_to_nearest) + result |= 0x3C00U & -(e >= (0x3800 + E)); + else if (R == std::round_toward_infinity) + result |= 0x3C00U & -(~(value >> 15) & (e != 0)); + else if (R == std::round_toward_neg_infinity) + result |= 0x3C00U & -(value > 0x8000); + } else if (e < 0x6400) { + e = 25 - (e >> 10); + uint32_t mask = (1 << e) - 1; + if (R == std::round_to_nearest) + result += (1 << (e - 1)) - (~(result >> e) & E); + else if (R == std::round_toward_infinity) + result += mask & ((value >> 15) - 1); + else if (R == std::round_toward_neg_infinity) + result += mask & -(value >> 15); + result &= ~mask; + } + return result; +} + +/// Round half-precision number to nearest integer value. +/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest +/// rounding +/// \param value binary representation of half-precision value +/// \return half-precision bits for nearest integral value +template uint16 round_half(uint16 value) { + return round_half_impl(value); +} + +/// Round half-precision number to nearest integer value using +/// round-to-nearest-away-from-zero. +/// \param value binary representation of half-precision value +/// \return half-precision bits for nearest integral value +inline uint16 round_half_up(uint16 value) { + return round_half_impl(value); +} +/// \} + +struct functions; +template struct unary_specialized; +template struct binary_specialized; +template struct half_caster; +} // namespace detail + +/// Half-precision floating point type. +/// This class implements an IEEE-conformant half-precision floating point type +/// with the usual arithmetic operators and +/// conversions. It is implicitly convertible to single-precision floating +/// point, which makes artihmetic expressions and +/// functions with mixed-type operands to be of the most precise operand type. +/// Additionally all arithmetic operations +/// (and many mathematical functions) are carried out in single-precision +/// internally. All conversions from single- to +/// half-precision are done using the library's default rounding mode, but +/// temporary results inside chained arithmetic +/// expressions are kept in single-precision as long as possible (while of +/// course still maintaining a strong +/// half-precision type). +/// +/// According to the C++98/03 definition, the half type is not a POD type. But +/// according to C++11's less strict and +/// extended definitions it is both a standard layout type and a trivially +/// copyable type (even if not a POD type), which +/// means it can be standard-conformantly copied using raw binary copies. But in +/// this context some more words about the +/// actual size of the type. Although the half is representing an IEEE 16-bit +/// type, it does not neccessarily have to be +/// of exactly 16-bits size. But on any reasonable implementation the actual +/// binary representation of this type will +/// most probably not ivolve any additional "magic" or padding beyond the simple +/// binary representation of the underlying +/// 16-bit IEEE number, even if not strictly guaranteed by the standard. But +/// even then it only has an actual size of 16 +/// bits if your C++ implementation supports an unsigned integer type of exactly +/// 16 bits width. But this should be the +/// case on nearly any reasonable platform. +/// +/// So if your C++ implementation is not totally exotic or imposes special +/// alignment requirements, it is a reasonable +/// assumption that the data of a half is just comprised of the 2 bytes of the +/// underlying IEEE representation. +class half { + friend struct detail::functions; + friend struct detail::unary_specialized; + friend struct detail::binary_specialized; + template + friend struct detail::half_caster; + friend class std::numeric_limits; +#if HALF_ENABLE_CPP11_HASH + friend struct std::hash; +#endif +#if HALF_ENABLE_CPP11_USER_LITERALS + friend half literal::operator"" _h(long double); +#endif + + public: + /// Default constructor. + /// This initializes the half to 0. Although this does not match the builtin + /// types' default-initialization semantics + /// and may be less efficient than no initialization, it is needed to provide + /// proper value-initialization semantics. + HALF_CONSTEXPR half() HALF_NOEXCEPT : data_() {} + + /// Copy constructor. + /// \tparam T type of concrete half expression + /// \param rhs half expression to copy from + half(detail::expr rhs) + : data_(detail::float2half(static_cast(rhs))) {} + + /// Conversion constructor. + /// \param rhs float to convert + explicit half(float rhs) : data_(detail::float2half(rhs)) {} + + /// Conversion to single-precision. + /// \return single precision value representing expression value + operator float() const { return detail::half2float(data_); } + + /// Assignment operator. + /// \tparam T type of concrete half expression + /// \param rhs half expression to copy from + /// \return reference to this half + half& operator=(detail::expr rhs) { return *this = static_cast(rhs); } + + /// Arithmetic assignment. + /// \tparam T type of concrete half expression + /// \param rhs half expression to add + /// \return reference to this half + template + typename detail::enable::type operator+=(T rhs) { + return *this += static_cast(rhs); + } + + /// Arithmetic assignment. + /// \tparam T type of concrete half expression + /// \param rhs half expression to subtract + /// \return reference to this half + template + typename detail::enable::type operator-=(T rhs) { + return *this -= static_cast(rhs); + } + + /// Arithmetic assignment. + /// \tparam T type of concrete half expression + /// \param rhs half expression to multiply with + /// \return reference to this half + template + typename detail::enable::type operator*=(T rhs) { + return *this *= static_cast(rhs); + } + + /// Arithmetic assignment. + /// \tparam T type of concrete half expression + /// \param rhs half expression to divide by + /// \return reference to this half + template + typename detail::enable::type operator/=(T rhs) { + return *this /= static_cast(rhs); + } + + /// Assignment operator. + /// \param rhs single-precision value to copy from + /// \return reference to this half + half& operator=(float rhs) { + data_ = detail::float2half(rhs); + return *this; + } + + /// Arithmetic assignment. + /// \param rhs single-precision value to add + /// \return reference to this half + half& operator+=(float rhs) { + data_ = + detail::float2half(detail::half2float(data_) + rhs); + return *this; + } + + /// Arithmetic assignment. + /// \param rhs single-precision value to subtract + /// \return reference to this half + half& operator-=(float rhs) { + data_ = + detail::float2half(detail::half2float(data_) - rhs); + return *this; + } + + /// Arithmetic assignment. + /// \param rhs single-precision value to multiply with + /// \return reference to this half + half& operator*=(float rhs) { + data_ = + detail::float2half(detail::half2float(data_) * rhs); + return *this; + } + + /// Arithmetic assignment. + /// \param rhs single-precision value to divide by + /// \return reference to this half + half& operator/=(float rhs) { + data_ = + detail::float2half(detail::half2float(data_) / rhs); + return *this; + } + + /// Prefix increment. + /// \return incremented half value + half& operator++() { return *this += 1.0f; } + + /// Prefix decrement. + /// \return decremented half value + half& operator--() { return *this -= 1.0f; } + + /// Postfix increment. + /// \return non-incremented half value + half operator++(int) { + half out(*this); + ++*this; + return out; + } + + /// Postfix decrement. + /// \return non-decremented half value + half operator--(int) { + half out(*this); + --*this; + return out; + } + + private: + /// Rounding mode to use + static const std::float_round_style round_style = + (std::float_round_style)(HALF_ROUND_STYLE); + + /// Constructor. + /// \param bits binary representation to set half to + HALF_CONSTEXPR half(detail::binary_t, detail::uint16 bits) HALF_NOEXCEPT + : data_(bits) {} + + /// Internal binary representation + detail::uint16 data_; +}; + +#if HALF_ENABLE_CPP11_USER_LITERALS +namespace literal { +/// Half literal. +/// While this returns an actual half-precision value, half literals can +/// unfortunately not be constant expressions due +/// to rather involved conversions. +/// \param value literal value +/// \return half with given value (if representable) +inline half operator"" _h(long double value) { + return half(detail::binary, detail::float2half(value)); +} +} // namespace literal +#endif + +namespace detail { +/// Wrapper implementing unspecialized half-precision functions. +struct functions { + /// Addition implementation. + /// \param x first operand + /// \param y second operand + /// \return Half-precision sum stored in single-precision + static expr plus(float x, float y) { return expr(x + y); } + + /// Subtraction implementation. + /// \param x first operand + /// \param y second operand + /// \return Half-precision difference stored in single-precision + static expr minus(float x, float y) { return expr(x - y); } + + /// Multiplication implementation. + /// \param x first operand + /// \param y second operand + /// \return Half-precision product stored in single-precision + static expr multiplies(float x, float y) { return expr(x * y); } + + /// Division implementation. + /// \param x first operand + /// \param y second operand + /// \return Half-precision quotient stored in single-precision + static expr divides(float x, float y) { return expr(x / y); } + + /// Output implementation. + /// \param out stream to write to + /// \param arg value to write + /// \return reference to stream + template + static std::basic_ostream& + write(std::basic_ostream& out, float arg) { + return out << arg; + } + + /// Input implementation. + /// \param in stream to read from + /// \param arg half to read into + /// \return reference to stream + template + static std::basic_istream& + read(std::basic_istream& in, half& arg) { + float f; + if (in >> f) + arg = f; + return in; + } + + /// Modulo implementation. + /// \param x first operand + /// \param y second operand + /// \return Half-precision division remainder stored in single-precision + static expr fmod(float x, float y) { return expr(std::fmod(x, y)); } + + /// Remainder implementation. + /// \param x first operand + /// \param y second operand + /// \return Half-precision division remainder stored in single-precision + static expr remainder(float x, float y) { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::remainder(x, y)); +#else + if (builtin_isnan(x) || builtin_isnan(y)) + return expr(std::numeric_limits::quiet_NaN()); + float ax = std::fabs(x), ay = std::fabs(y); + if (ax >= 65536.0f || ay < std::ldexp(1.0f, -24)) + return expr(std::numeric_limits::quiet_NaN()); + if (ay >= 65536.0f) + return expr(x); + if (ax == ay) + return expr(builtin_signbit(x) ? -0.0f : 0.0f); + ax = std::fmod(ax, ay + ay); + float y2 = 0.5f * ay; + if (ax > y2) { + ax -= ay; + if (ax >= y2) + ax -= ay; + } + return expr(builtin_signbit(x) ? -ax : ax); +#endif + } + + /// Remainder implementation. + /// \param x first operand + /// \param y second operand + /// \param quo address to store quotient bits at + /// \return Half-precision division remainder stored in single-precision + static expr remquo(float x, float y, int* quo) { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::remquo(x, y, quo)); +#else + if (builtin_isnan(x) || builtin_isnan(y)) + return expr(std::numeric_limits::quiet_NaN()); + bool sign = builtin_signbit(x), + qsign = static_cast(sign ^ builtin_signbit(y)); + float ax = std::fabs(x), ay = std::fabs(y); + if (ax >= 65536.0f || ay < std::ldexp(1.0f, -24)) + return expr(std::numeric_limits::quiet_NaN()); + if (ay >= 65536.0f) + return expr(x); + if (ax == ay) + return *quo = qsign ? -1 : 1, expr(sign ? -0.0f : 0.0f); + ax = std::fmod(ax, 8.0f * ay); + int cquo = 0; + if (ax >= 4.0f * ay) { + ax -= 4.0f * ay; + cquo += 4; + } + if (ax >= 2.0f * ay) { + ax -= 2.0f * ay; + cquo += 2; + } + float y2 = 0.5f * ay; + if (ax > y2) { + ax -= ay; + ++cquo; + if (ax >= y2) { + ax -= ay; + ++cquo; + } + } + return *quo = qsign ? -cquo : cquo, expr(sign ? -ax : ax); +#endif + } + + /// Positive difference implementation. + /// \param x first operand + /// \param y second operand + /// \return Positive difference stored in single-precision + static expr fdim(float x, float y) { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::fdim(x, y)); +#else + return expr((x <= y) ? 0.0f : (x - y)); +#endif + } + + /// Fused multiply-add implementation. + /// \param x first operand + /// \param y second operand + /// \param z third operand + /// \return \a x * \a y + \a z stored in single-precision + static expr fma(float x, float y, float z) { +#if HALF_ENABLE_CPP11_CMATH && defined(FP_FAST_FMAF) + return expr(std::fma(x, y, z)); +#else + return expr(x * y + z); +#endif + } + + /// Get NaN. + /// \return Half-precision quiet NaN + static half nanh() { return half(binary, 0x7FFF); } + + /// Exponential implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr exp(float arg) { return expr(std::exp(arg)); } + + /// Exponential implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr expm1(float arg) { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::expm1(arg)); +#else + return expr(static_cast(std::exp(static_cast(arg)) - 1.0)); +#endif + } + + /// Binary exponential implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr exp2(float arg) { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::exp2(arg)); +#else + return expr( + static_cast(std::exp(arg * 0.69314718055994530941723212145818))); +#endif + } + + /// Logarithm implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr log(float arg) { return expr(std::log(arg)); } + + /// Common logarithm implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr log10(float arg) { return expr(std::log10(arg)); } + + /// Logarithm implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr log1p(float arg) { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::log1p(arg)); +#else + return expr(static_cast(std::log(1.0 + arg))); +#endif + } + + /// Binary logarithm implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr log2(float arg) { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::log2(arg)); +#else + return expr(static_cast(std::log(static_cast(arg)) * + 1.4426950408889634073599246810019)); +#endif + } + + /// Square root implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr sqrt(float arg) { return expr(std::sqrt(arg)); } + + /// Cubic root implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr cbrt(float arg) { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::cbrt(arg)); +#else + if (builtin_isnan(arg) || builtin_isinf(arg)) + return expr(arg); + return expr(builtin_signbit(arg) + ? -static_cast( + std::pow(-static_cast(arg), 1.0 / 3.0)) + : static_cast( + std::pow(static_cast(arg), 1.0 / 3.0))); +#endif + } + + /// Hypotenuse implementation. + /// \param x first argument + /// \param y second argument + /// \return function value stored in single-preicision + static expr hypot(float x, float y) { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::hypot(x, y)); +#else + return expr( + (builtin_isinf(x) || builtin_isinf(y)) + ? std::numeric_limits::infinity() + : static_cast(std::sqrt(static_cast(x) * x + + static_cast(y) * y))); +#endif + } + + /// Power implementation. + /// \param base value to exponentiate + /// \param exp power to expontiate to + /// \return function value stored in single-preicision + static expr pow(float base, float exp) { return expr(std::pow(base, exp)); } + + /// Sine implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr sin(float arg) { return expr(std::sin(arg)); } + + /// Cosine implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr cos(float arg) { return expr(std::cos(arg)); } + + /// Tan implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr tan(float arg) { return expr(std::tan(arg)); } + + /// Arc sine implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr asin(float arg) { return expr(std::asin(arg)); } + + /// Arc cosine implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr acos(float arg) { return expr(std::acos(arg)); } + + /// Arc tangent implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr atan(float arg) { return expr(std::atan(arg)); } + + /// Arc tangent implementation. + /// \param x first argument + /// \param y second argument + /// \return function value stored in single-preicision + static expr atan2(float x, float y) { return expr(std::atan2(x, y)); } + + /// Hyperbolic sine implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr sinh(float arg) { return expr(std::sinh(arg)); } + + /// Hyperbolic cosine implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr cosh(float arg) { return expr(std::cosh(arg)); } + + /// Hyperbolic tangent implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr tanh(float arg) { return expr(std::tanh(arg)); } + + /// Hyperbolic area sine implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr asinh(float arg) { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::asinh(arg)); +#else + return expr( + (arg == -std::numeric_limits::infinity()) + ? arg + : static_cast(std::log(arg + std::sqrt(arg * arg + 1.0)))); +#endif + } + + /// Hyperbolic area cosine implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr acosh(float arg) { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::acosh(arg)); +#else + return expr((arg < -1.0f) ? std::numeric_limits::quiet_NaN() + : static_cast(std::log( + arg + std::sqrt(arg * arg - 1.0)))); +#endif + } + + /// Hyperbolic area tangent implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr atanh(float arg) { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::atanh(arg)); +#else + return expr(static_cast(0.5 * std::log((1.0 + arg) / (1.0 - arg)))); +#endif + } + + /// Error function implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr erf(float arg) { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::erf(arg)); +#else + return expr(static_cast(erf(static_cast(arg)))); +#endif + } + + /// Complementary implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr erfc(float arg) { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::erfc(arg)); +#else + return expr(static_cast(1.0 - erf(static_cast(arg)))); +#endif + } + + /// Gamma logarithm implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr lgamma(float arg) { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::lgamma(arg)); +#else + if (builtin_isinf(arg)) + return expr(std::numeric_limits::infinity()); + if (arg < 0.0f) { + float i, f = std::modf(-arg, &i); + if (f == 0.0f) + return expr(std::numeric_limits::infinity()); + return expr(static_cast( + 1.1447298858494001741434273513531 - + std::log(std::abs(std::sin(3.1415926535897932384626433832795 * f))) - + lgamma(1.0 - arg))); + } + return expr(static_cast(lgamma(static_cast(arg)))); +#endif + } + + /// Gamma implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr tgamma(float arg) { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::tgamma(arg)); +#else + if (arg == 0.0f) + return builtin_signbit(arg) + ? expr(-std::numeric_limits::infinity()) + : expr(std::numeric_limits::infinity()); + if (arg < 0.0f) { + float i, f = std::modf(-arg, &i); + if (f == 0.0f) + return expr(std::numeric_limits::quiet_NaN()); + double value = 3.1415926535897932384626433832795 / + (std::sin(3.1415926535897932384626433832795 * f) * + std::exp(lgamma(1.0 - arg))); + return expr( + static_cast((std::fmod(i, 2.0f) == 0.0f) ? -value : value)); + } + if (builtin_isinf(arg)) + return expr(arg); + return expr(static_cast(std::exp(lgamma(static_cast(arg))))); +#endif + } + + /// Floor implementation. + /// \param arg value to round + /// \return rounded value + static half floor(half arg) { + return half(binary, round_half(arg.data_)); + } + + /// Ceiling implementation. + /// \param arg value to round + /// \return rounded value + static half ceil(half arg) { + return half(binary, round_half(arg.data_)); + } + + /// Truncation implementation. + /// \param arg value to round + /// \return rounded value + static half trunc(half arg) { + return half(binary, round_half(arg.data_)); + } + + /// Nearest integer implementation. + /// \param arg value to round + /// \return rounded value + static half round(half arg) { return half(binary, round_half_up(arg.data_)); } + + /// Nearest integer implementation. + /// \param arg value to round + /// \return rounded value + static long lround(half arg) { return detail::half2int_up(arg.data_); } + + /// Nearest integer implementation. + /// \param arg value to round + /// \return rounded value + static half rint(half arg) { + return half(binary, round_half(arg.data_)); + } + + /// Nearest integer implementation. + /// \param arg value to round + /// \return rounded value + static long lrint(half arg) { + return detail::half2int(arg.data_); + } + +#if HALF_ENABLE_CPP11_LONG_LONG + /// Nearest integer implementation. + /// \param arg value to round + /// \return rounded value + static long long llround(half arg) { + return detail::half2int_up(arg.data_); + } + + /// Nearest integer implementation. + /// \param arg value to round + /// \return rounded value + static long long llrint(half arg) { + return detail::half2int(arg.data_); + } +#endif + + /// Decompression implementation. + /// \param arg number to decompress + /// \param exp address to store exponent at + /// \return normalized significant + static half frexp(half arg, int* exp) { + int m = arg.data_ & 0x7FFF, e = -14; + if (m >= 0x7C00 || !m) + return *exp = 0, arg; + for (; m < 0x400; m <<= 1, --e) + ; + return *exp = e + (m >> 10), + half(binary, (arg.data_ & 0x8000) | 0x3800 | (m & 0x3FF)); + } + + /// Decompression implementation. + /// \param arg number to decompress + /// \param iptr address to store integer part at + /// \return fractional part + static half modf(half arg, half* iptr) { + uint32_t e = arg.data_ & 0x7FFF; + if (e >= 0x6400) + return *iptr = arg, half(binary, arg.data_ & (0x8000U | -(e > 0x7C00))); + if (e < 0x3C00) + return iptr->data_ = arg.data_ & 0x8000, arg; + e >>= 10; + uint32_t mask = (1 << (25 - e)) - 1, m = arg.data_ & mask; + iptr->data_ = arg.data_ & ~mask; + if (!m) + return half(binary, arg.data_ & 0x8000); + for (; m < 0x400; m <<= 1, --e) + ; + return half(binary, static_cast((arg.data_ & 0x8000) | (e << 10) | + (m & 0x3FF))); + } + + /// Scaling implementation. + /// \param arg number to scale + /// \param exp power of two to scale by + /// \return scaled number + static half scalbln(half arg, long exp) { + uint32_t m = arg.data_ & 0x7FFF; + if (m >= 0x7C00 || !m) + return arg; + for (; m < 0x400; m <<= 1, --exp) + ; + exp += m >> 10; + uint16 value = arg.data_ & 0x8000; + if (exp > 30) { + if (half::round_style == std::round_toward_zero) + value |= 0x7BFF; + else if (half::round_style == std::round_toward_infinity) + value |= 0x7C00 - (value >> 15); + else if (half::round_style == std::round_toward_neg_infinity) + value |= 0x7BFF + (value >> 15); + else + value |= 0x7C00; + } else if (exp > 0) + value |= (exp << 10) | (m & 0x3FF); + else if (exp > -11) { + m = (m & 0x3FF) | 0x400; + if (half::round_style == std::round_to_nearest) { + m += 1 << -exp; +#if HALF_ROUND_TIES_TO_EVEN + m -= (m >> (1 - exp)) & 1; +#endif + } else if (half::round_style == std::round_toward_infinity) + m += ((value >> 15) - 1) & ((1 << (1 - exp)) - 1U); + else if (half::round_style == std::round_toward_neg_infinity) + m += -(value >> 15) & ((1 << (1 - exp)) - 1U); + value |= m >> (1 - exp); + } else if (half::round_style == std::round_toward_infinity) + value -= (value >> 15) - 1; + else if (half::round_style == std::round_toward_neg_infinity) + value += value >> 15; + return half(binary, value); + } + + /// Exponent implementation. + /// \param arg number to query + /// \return floating point exponent + static int ilogb(half arg) { + int abs = arg.data_ & 0x7FFF; + if (!abs) + return FP_ILOGB0; + if (abs < 0x7C00) { + int exp = (abs >> 10) - 15; + if (abs < 0x400) + for (; abs < 0x200; abs <<= 1, --exp) + ; + return exp; + } + if (abs > 0x7C00) + return FP_ILOGBNAN; + return INT_MAX; + } + + /// Exponent implementation. + /// \param arg number to query + /// \return floating point exponent + static half logb(half arg) { + int abs = arg.data_ & 0x7FFF; + if (!abs) + return half(binary, 0xFC00); + if (abs < 0x7C00) { + int exp = (abs >> 10) - 15; + if (abs < 0x400) + for (; abs < 0x200; abs <<= 1, --exp) + ; + uint16 bits = (exp < 0) << 15; + if (exp) { + uint32_t m = std::abs(exp) << 6, e = 18; + for (; m < 0x400; m <<= 1, --e) + ; + bits |= (e << 10) + m; + } + return half(binary, bits); + } + if (abs > 0x7C00) + return arg; + return half(binary, 0x7C00); + } + + /// Enumeration implementation. + /// \param from number to increase/decrease + /// \param to direction to enumerate into + /// \return next representable number + static half nextafter(half from, half to) { + uint16 fabs = from.data_ & 0x7FFF, tabs = to.data_ & 0x7FFF; + if (fabs > 0x7C00) + return from; + if (tabs > 0x7C00 || from.data_ == to.data_ || !(fabs | tabs)) + return to; + if (!fabs) + return half(binary, (to.data_ & 0x8000) + 1); + bool lt = + ((fabs == from.data_) ? static_cast(fabs) + : -static_cast(fabs)) < + ((tabs == to.data_) ? static_cast(tabs) : -static_cast(tabs)); + return half(binary, + from.data_ + + (((from.data_ >> 15) ^ static_cast(lt)) << 1) - + 1); + } + + /// Enumeration implementation. + /// \param from number to increase/decrease + /// \param to direction to enumerate into + /// \return next representable number + static half nexttoward(half from, long double to) { + if (isnan(from)) + return from; + long double lfrom = static_cast(from); + if (builtin_isnan(to) || lfrom == to) + return half(static_cast(to)); + if (!(from.data_ & 0x7FFF)) + return half(binary, + (static_cast(builtin_signbit(to)) << 15) + 1); + return half( + binary, + from.data_ + + (((from.data_ >> 15) ^ static_cast(lfrom < to)) << 1) - + 1); + } + + /// Sign implementation + /// \param x first operand + /// \param y second operand + /// \return composed value + static half copysign(half x, half y) { + return half(binary, x.data_ ^ ((x.data_ ^ y.data_) & 0x8000)); + } + + /// Classification implementation. + /// \param arg value to classify + /// \retval true if infinite number + /// \retval false else + static int fpclassify(half arg) { + uint32_t abs = arg.data_ & 0x7FFF; + return abs ? ((abs > 0x3FF) ? ((abs >= 0x7C00) + ? ((abs > 0x7C00) ? FP_NAN : FP_INFINITE) + : FP_NORMAL) + : FP_SUBNORMAL) + : FP_ZERO; + } + + /// Classification implementation. + /// \param arg value to classify + /// \retval true if finite number + /// \retval false else + static bool isfinite(half arg) { return (arg.data_ & 0x7C00) != 0x7C00; } + + /// Classification implementation. + /// \param arg value to classify + /// \retval true if infinite number + /// \retval false else + static bool isinf(half arg) { return (arg.data_ & 0x7FFF) == 0x7C00; } + + /// Classification implementation. + /// \param arg value to classify + /// \retval true if not a number + /// \retval false else + static bool isnan(half arg) { return (arg.data_ & 0x7FFF) > 0x7C00; } + + /// Classification implementation. + /// \param arg value to classify + /// \retval true if normal number + /// \retval false else + static bool isnormal(half arg) { + return ((arg.data_ & 0x7C00) != 0) & ((arg.data_ & 0x7C00) != 0x7C00); + } + + /// Sign bit implementation. + /// \param arg value to check + /// \retval true if signed + /// \retval false if unsigned + static bool signbit(half arg) { return (arg.data_ & 0x8000) != 0; } + + /// Comparison implementation. + /// \param x first operand + /// \param y second operand + /// \retval true if operands equal + /// \retval false else + static bool isequal(half x, half y) { + return (x.data_ == y.data_ || !((x.data_ | y.data_) & 0x7FFF)) && !isnan(x); + } + + /// Comparison implementation. + /// \param x first operand + /// \param y second operand + /// \retval true if operands not equal + /// \retval false else + static bool isnotequal(half x, half y) { + return (x.data_ != y.data_ && ((x.data_ | y.data_) & 0x7FFF)) || isnan(x); + } + + /// Comparison implementation. + /// \param x first operand + /// \param y second operand + /// \retval true if \a x > \a y + /// \retval false else + static bool isgreater(half x, half y) { + int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF; + return xabs <= 0x7C00 && yabs <= 0x7C00 && + (((xabs == x.data_) ? xabs : -xabs) > + ((yabs == y.data_) ? yabs : -yabs)); + } + + /// Comparison implementation. + /// \param x first operand + /// \param y second operand + /// \retval true if \a x >= \a y + /// \retval false else + static bool isgreaterequal(half x, half y) { + int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF; + return xabs <= 0x7C00 && yabs <= 0x7C00 && + (((xabs == x.data_) ? xabs : -xabs) >= + ((yabs == y.data_) ? yabs : -yabs)); + } + + /// Comparison implementation. + /// \param x first operand + /// \param y second operand + /// \retval true if \a x < \a y + /// \retval false else + static bool isless(half x, half y) { + int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF; + return xabs <= 0x7C00 && yabs <= 0x7C00 && + (((xabs == x.data_) ? xabs : -xabs) < + ((yabs == y.data_) ? yabs : -yabs)); + } + + /// Comparison implementation. + /// \param x first operand + /// \param y second operand + /// \retval true if \a x <= \a y + /// \retval false else + static bool islessequal(half x, half y) { + int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF; + return xabs <= 0x7C00 && yabs <= 0x7C00 && + (((xabs == x.data_) ? xabs : -xabs) <= + ((yabs == y.data_) ? yabs : -yabs)); + } + + /// Comparison implementation. + /// \param x first operand + /// \param y second operand + /// \retval true if either \a x > \a y nor \a x < \a y + /// \retval false else + static bool islessgreater(half x, half y) { + int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF; + if (xabs > 0x7C00 || yabs > 0x7C00) + return false; + int a = (xabs == x.data_) ? xabs : -xabs, + b = (yabs == y.data_) ? yabs : -yabs; + return a < b || a > b; + } + + /// Comparison implementation. + /// \param x first operand + /// \param y second operand + /// \retval true if operand unordered + /// \retval false else + static bool isunordered(half x, half y) { return isnan(x) || isnan(y); } + + private: + static double erf(double arg) { + if (builtin_isinf(arg)) + return (arg < 0.0) ? -1.0 : 1.0; + double x2 = arg * arg, ax2 = 0.147 * x2, + value = std::sqrt( + 1.0 - std::exp(-x2 * (1.2732395447351626861510701069801 + ax2) / + (1.0 + ax2))); + return builtin_signbit(arg) ? -value : value; + } + + static double lgamma(double arg) { + double v = 1.0; + for (; arg < 8.0; ++arg) + v *= arg; + double w = 1.0 / (arg * arg); + return (((((((-0.02955065359477124183006535947712 * w + + 0.00641025641025641025641025641026) * + w + + -0.00191752691752691752691752691753) * + w + + 8.4175084175084175084175084175084e-4) * + w + + -5.952380952380952380952380952381e-4) * + w + + 7.9365079365079365079365079365079e-4) * + w + + -0.00277777777777777777777777777778) * + w + + 0.08333333333333333333333333333333) / + arg + + 0.91893853320467274178032973640562 - std::log(v) - arg + + (arg - 0.5) * std::log(arg); + } +}; + +/// Wrapper for unary half-precision functions needing specialization for +/// individual argument types. +/// \tparam T argument type +template struct unary_specialized { + /// Negation implementation. + /// \param arg value to negate + /// \return negated value + static HALF_CONSTEXPR half negate(half arg) { + return half(binary, arg.data_ ^ 0x8000); + } + + /// Absolute value implementation. + /// \param arg function argument + /// \return absolute value + static half fabs(half arg) { return half(binary, arg.data_ & 0x7FFF); } +}; +template <> struct unary_specialized { + static HALF_CONSTEXPR expr negate(float arg) { return expr(-arg); } + static expr fabs(float arg) { return expr(std::fabs(arg)); } +}; + +/// Wrapper for binary half-precision functions needing specialization for +/// individual argument types. +/// \tparam T first argument type +/// \tparam U first argument type +template struct binary_specialized { + /// Minimum implementation. + /// \param x first operand + /// \param y second operand + /// \return minimum value + static expr fmin(float x, float y) { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::fmin(x, y)); +#else + if (builtin_isnan(x)) + return expr(y); + if (builtin_isnan(y)) + return expr(x); + return expr(std::min(x, y)); +#endif + } + + /// Maximum implementation. + /// \param x first operand + /// \param y second operand + /// \return maximum value + static expr fmax(float x, float y) { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::fmax(x, y)); +#else + if (builtin_isnan(x)) + return expr(y); + if (builtin_isnan(y)) + return expr(x); + return expr(std::max(x, y)); +#endif + } +}; +template <> struct binary_specialized { + static half fmin(half x, half y) { + int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF; + if (xabs > 0x7C00) + return y; + if (yabs > 0x7C00) + return x; + return (((xabs == x.data_) ? xabs : -xabs) > + ((yabs == y.data_) ? yabs : -yabs)) + ? y + : x; + } + static half fmax(half x, half y) { + int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF; + if (xabs > 0x7C00) + return y; + if (yabs > 0x7C00) + return x; + return (((xabs == x.data_) ? xabs : -xabs) < + ((yabs == y.data_) ? yabs : -yabs)) + ? y + : x; + } +}; + +/// Helper class for half casts. +/// This class template has to be specialized for all valid cast argument to +/// define an appropriate static `cast` member +/// function and a corresponding `type` member denoting its return type. +/// \tparam T destination type +/// \tparam U source type +/// \tparam R rounding mode to use +template +struct half_caster {}; +template struct half_caster { +#if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS + static_assert(std::is_arithmetic::value, + "half_cast from non-arithmetic type unsupported"); +#endif + + static half cast(U arg) { return cast_impl(arg, is_float()); }; + + private: + static half cast_impl(U arg, true_type) { + return half(binary, float2half(arg)); + } + static half cast_impl(U arg, false_type) { + return half(binary, int2half(arg)); + } +}; +template struct half_caster { +#if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS + static_assert(std::is_arithmetic::value, + "half_cast to non-arithmetic type unsupported"); +#endif + + static T cast(half arg) { return cast_impl(arg, is_float()); } + + private: + static T cast_impl(half arg, true_type) { return half2float(arg.data_); } + static T cast_impl(half arg, false_type) { return half2int(arg.data_); } +}; +template struct half_caster { +#if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS + static_assert(std::is_arithmetic::value, + "half_cast to non-arithmetic type unsupported"); +#endif + + static T cast(expr arg) { return cast_impl(arg, is_float()); } + + private: + static T cast_impl(float arg, true_type) { return static_cast(arg); } + static T cast_impl(half arg, false_type) { return half2int(arg.data_); } +}; +template struct half_caster { + static half cast(half arg) { return arg; } +}; +template +struct half_caster : half_caster {}; + +/// \name Comparison operators +/// \{ + +/// Comparison for equality. +/// \param x first operand +/// \param y second operand +/// \retval true if operands equal +/// \retval false else +template +typename enable::type operator==(T x, U y) { + return functions::isequal(x, y); +} + +/// Comparison for inequality. +/// \param x first operand +/// \param y second operand +/// \retval true if operands not equal +/// \retval false else +template +typename enable::type operator!=(T x, U y) { + return functions::isnotequal(x, y); +} + +/// Comparison for less than. +/// \param x first operand +/// \param y second operand +/// \retval true if \a x less than \a y +/// \retval false else +template +typename enable::type operator<(T x, U y) { + return functions::isless(x, y); +} + +/// Comparison for greater than. +/// \param x first operand +/// \param y second operand +/// \retval true if \a x greater than \a y +/// \retval false else +template +typename enable::type operator>(T x, U y) { + return functions::isgreater(x, y); +} + +/// Comparison for less equal. +/// \param x first operand +/// \param y second operand +/// \retval true if \a x less equal \a y +/// \retval false else +template +typename enable::type operator<=(T x, U y) { + return functions::islessequal(x, y); +} + +/// Comparison for greater equal. +/// \param x first operand +/// \param y second operand +/// \retval true if \a x greater equal \a y +/// \retval false else +template +typename enable::type operator>=(T x, U y) { + return functions::isgreaterequal(x, y); +} + +/// \} +/// \name Arithmetic operators +/// \{ + +/// Add halfs. +/// \param x left operand +/// \param y right operand +/// \return sum of half expressions +template +typename enable::type operator+(T x, U y) { + return functions::plus(x, y); +} + +/// Subtract halfs. +/// \param x left operand +/// \param y right operand +/// \return difference of half expressions +template +typename enable::type operator-(T x, U y) { + return functions::minus(x, y); +} + +/// Multiply halfs. +/// \param x left operand +/// \param y right operand +/// \return product of half expressions +template +typename enable::type operator*(T x, U y) { + return functions::multiplies(x, y); +} + +/// Divide halfs. +/// \param x left operand +/// \param y right operand +/// \return quotient of half expressions +template +typename enable::type operator/(T x, U y) { + return functions::divides(x, y); +} + +/// Identity. +/// \param arg operand +/// \return uncahnged operand +template +HALF_CONSTEXPR typename enable::type operator+(T arg) { + return arg; +} + +/// Negation. +/// \param arg operand +/// \return negated operand +template +HALF_CONSTEXPR typename enable::type operator-(T arg) { + return unary_specialized::negate(arg); +} + +/// \} +/// \name Input and output +/// \{ + +/// Output operator. +/// \param out output stream to write into +/// \param arg half expression to write +/// \return reference to output stream +template +typename enable&, T>::type +operator<<(std::basic_ostream& out, T arg) { + return functions::write(out, arg); +} + +/// Input operator. +/// \param in input stream to read from +/// \param arg half to read into +/// \return reference to input stream +template +std::basic_istream& +operator>>(std::basic_istream& in, half& arg) { + return functions::read(in, arg); +} + +/// \} +/// \name Basic mathematical operations +/// \{ + +/// Absolute value. +/// \param arg operand +/// \return absolute value of \a arg +// template typename enable::type abs(T arg) { +// return unary_specialized::fabs(arg); } +inline half abs(half arg) { return unary_specialized::fabs(arg); } +inline expr abs(expr arg) { return unary_specialized::fabs(arg); } + +/// Absolute value. +/// \param arg operand +/// \return absolute value of \a arg +// template typename enable::type fabs(T arg) { +// return unary_specialized::fabs(arg); } +inline half fabs(half arg) { return unary_specialized::fabs(arg); } +inline expr fabs(expr arg) { return unary_specialized::fabs(arg); } + +/// Remainder of division. +/// \param x first operand +/// \param y second operand +/// \return remainder of floating point division. +// template typename enable::type +// fmod(T x, U y) { return functions::fmod(x, y); } +inline expr fmod(half x, half y) { return functions::fmod(x, y); } +inline expr fmod(half x, expr y) { return functions::fmod(x, y); } +inline expr fmod(expr x, half y) { return functions::fmod(x, y); } +inline expr fmod(expr x, expr y) { return functions::fmod(x, y); } + +/// Remainder of division. +/// \param x first operand +/// \param y second operand +/// \return remainder of floating point division. +// template typename enable::type +// remainder(T x, U y) { return +// functions::remainder(x, y); } +inline expr remainder(half x, half y) { return functions::remainder(x, y); } +inline expr remainder(half x, expr y) { return functions::remainder(x, y); } +inline expr remainder(expr x, half y) { return functions::remainder(x, y); } +inline expr remainder(expr x, expr y) { return functions::remainder(x, y); } + +/// Remainder of division. +/// \param x first operand +/// \param y second operand +/// \param quo address to store some bits of quotient at +/// \return remainder of floating point division. +// template typename enable::type +// remquo(T x, U y, int *quo) { return +// functions::remquo(x, y, quo); } +inline expr remquo(half x, half y, int* quo) { + return functions::remquo(x, y, quo); +} +inline expr remquo(half x, expr y, int* quo) { + return functions::remquo(x, y, quo); +} +inline expr remquo(expr x, half y, int* quo) { + return functions::remquo(x, y, quo); +} +inline expr remquo(expr x, expr y, int* quo) { + return functions::remquo(x, y, quo); +} + +/// Fused multiply add. +/// \param x first operand +/// \param y second operand +/// \param z third operand +/// \return ( \a x * \a y ) + \a z rounded as one operation. +// template typename +// enable::type fma(T x, U y, V z) { return +// functions::fma(x, y, z); } +inline expr fma(half x, half y, half z) { return functions::fma(x, y, z); } +inline expr fma(half x, half y, expr z) { return functions::fma(x, y, z); } +inline expr fma(half x, expr y, half z) { return functions::fma(x, y, z); } +inline expr fma(half x, expr y, expr z) { return functions::fma(x, y, z); } +inline expr fma(expr x, half y, half z) { return functions::fma(x, y, z); } +inline expr fma(expr x, half y, expr z) { return functions::fma(x, y, z); } +inline expr fma(expr x, expr y, half z) { return functions::fma(x, y, z); } +inline expr fma(expr x, expr y, expr z) { return functions::fma(x, y, z); } + +/// Maximum of half expressions. +/// \param x first operand +/// \param y second operand +/// \return maximum of operands +// template typename result::type +// fmax(T +// x, U y) { return +// binary_specialized::fmax(x, y); } +inline half fmax(half x, half y) { + return binary_specialized::fmax(x, y); +} +inline expr fmax(half x, expr y) { + return binary_specialized::fmax(x, y); +} +inline expr fmax(expr x, half y) { + return binary_specialized::fmax(x, y); +} +inline expr fmax(expr x, expr y) { + return binary_specialized::fmax(x, y); +} + +/// Minimum of half expressions. +/// \param x first operand +/// \param y second operand +/// \return minimum of operands +// template typename result::type +// fmin(T +// x, U y) { return +// binary_specialized::fmin(x, y); } +inline half fmin(half x, half y) { + return binary_specialized::fmin(x, y); +} +inline expr fmin(half x, expr y) { + return binary_specialized::fmin(x, y); +} +inline expr fmin(expr x, half y) { + return binary_specialized::fmin(x, y); +} +inline expr fmin(expr x, expr y) { + return binary_specialized::fmin(x, y); +} + +/// Positive difference. +/// \param x first operand +/// \param y second operand +/// \return \a x - \a y or 0 if difference negative +// template typename enable::type +// fdim(T x, U y) { return functions::fdim(x, y); } +inline expr fdim(half x, half y) { return functions::fdim(x, y); } +inline expr fdim(half x, expr y) { return functions::fdim(x, y); } +inline expr fdim(expr x, half y) { return functions::fdim(x, y); } +inline expr fdim(expr x, expr y) { return functions::fdim(x, y); } + +/// Get NaN value. +/// \return quiet NaN +inline half nanh(const char*) { return functions::nanh(); } + +/// \} +/// \name Exponential functions +/// \{ + +/// Exponential function. +/// \param arg function argument +/// \return e raised to \a arg +// template typename enable::type exp(T arg) { +// return functions::exp(arg); } +inline expr exp(half arg) { return functions::exp(arg); } +inline expr exp(expr arg) { return functions::exp(arg); } + +/// Exponential minus one. +/// \param arg function argument +/// \return e raised to \a arg subtracted by 1 +// template typename enable::type expm1(T arg) +//{ +// return functions::expm1(arg); } +inline expr expm1(half arg) { return functions::expm1(arg); } +inline expr expm1(expr arg) { return functions::expm1(arg); } + +/// Binary exponential. +/// \param arg function argument +/// \return 2 raised to \a arg +// template typename enable::type exp2(T arg) { +// return functions::exp2(arg); } +inline expr exp2(half arg) { return functions::exp2(arg); } +inline expr exp2(expr arg) { return functions::exp2(arg); } + +/// Natural logorithm. +/// \param arg function argument +/// \return logarithm of \a arg to base e +// template typename enable::type log(T arg) { +// return functions::log(arg); } +inline expr log(half arg) { return functions::log(arg); } +inline expr log(expr arg) { return functions::log(arg); } + +/// Common logorithm. +/// \param arg function argument +/// \return logarithm of \a arg to base 10 +// template typename enable::type log10(T arg) +//{ +// return functions::log10(arg); } +inline expr log10(half arg) { return functions::log10(arg); } +inline expr log10(expr arg) { return functions::log10(arg); } + +/// Natural logorithm. +/// \param arg function argument +/// \return logarithm of \a arg plus 1 to base e +// template typename enable::type log1p(T arg) +//{ +// return functions::log1p(arg); } +inline expr log1p(half arg) { return functions::log1p(arg); } +inline expr log1p(expr arg) { return functions::log1p(arg); } + +/// Binary logorithm. +/// \param arg function argument +/// \return logarithm of \a arg to base 2 +// template typename enable::type log2(T arg) { +// return functions::log2(arg); } +inline expr log2(half arg) { return functions::log2(arg); } +inline expr log2(expr arg) { return functions::log2(arg); } + +/// \} +/// \name Power functions +/// \{ + +/// Square root. +/// \param arg function argument +/// \return square root of \a arg +// template typename enable::type sqrt(T arg) { +// return functions::sqrt(arg); } +inline expr sqrt(half arg) { return functions::sqrt(arg); } +inline expr sqrt(expr arg) { return functions::sqrt(arg); } + +/// Cubic root. +/// \param arg function argument +/// \return cubic root of \a arg +// template typename enable::type cbrt(T arg) { +// return functions::cbrt(arg); } +inline expr cbrt(half arg) { return functions::cbrt(arg); } +inline expr cbrt(expr arg) { return functions::cbrt(arg); } + +/// Hypotenuse function. +/// \param x first argument +/// \param y second argument +/// \return square root of sum of squares without internal over- or underflows +// template typename enable::type +// hypot(T x, U y) { return functions::hypot(x, y); +//} +inline expr hypot(half x, half y) { return functions::hypot(x, y); } +inline expr hypot(half x, expr y) { return functions::hypot(x, y); } +inline expr hypot(expr x, half y) { return functions::hypot(x, y); } +inline expr hypot(expr x, expr y) { return functions::hypot(x, y); } + +/// Power function. +/// \param base first argument +/// \param exp second argument +/// \return \a base raised to \a exp +// template typename enable::type +// pow(T base, U exp) { return functions::pow(base, +// exp); } +inline expr pow(half base, half exp) { return functions::pow(base, exp); } +inline expr pow(half base, expr exp) { return functions::pow(base, exp); } +inline expr pow(expr base, half exp) { return functions::pow(base, exp); } +inline expr pow(expr base, expr exp) { return functions::pow(base, exp); } + +/// \} +/// \name Trigonometric functions +/// \{ + +/// Sine function. +/// \param arg function argument +/// \return sine value of \a arg +// template typename enable::type sin(T arg) { +// return functions::sin(arg); } +inline expr sin(half arg) { return functions::sin(arg); } +inline expr sin(expr arg) { return functions::sin(arg); } + +/// Cosine function. +/// \param arg function argument +/// \return cosine value of \a arg +// template typename enable::type cos(T arg) { +// return functions::cos(arg); } +inline expr cos(half arg) { return functions::cos(arg); } +inline expr cos(expr arg) { return functions::cos(arg); } + +/// Tangent function. +/// \param arg function argument +/// \return tangent value of \a arg +// template typename enable::type tan(T arg) { +// return functions::tan(arg); } +inline expr tan(half arg) { return functions::tan(arg); } +inline expr tan(expr arg) { return functions::tan(arg); } + +/// Arc sine. +/// \param arg function argument +/// \return arc sine value of \a arg +// template typename enable::type asin(T arg) { +// return functions::asin(arg); } +inline expr asin(half arg) { return functions::asin(arg); } +inline expr asin(expr arg) { return functions::asin(arg); } + +/// Arc cosine function. +/// \param arg function argument +/// \return arc cosine value of \a arg +// template typename enable::type acos(T arg) { +// return functions::acos(arg); } +inline expr acos(half arg) { return functions::acos(arg); } +inline expr acos(expr arg) { return functions::acos(arg); } + +/// Arc tangent function. +/// \param arg function argument +/// \return arc tangent value of \a arg +// template typename enable::type atan(T arg) { +// return functions::atan(arg); } +inline expr atan(half arg) { return functions::atan(arg); } +inline expr atan(expr arg) { return functions::atan(arg); } + +/// Arc tangent function. +/// \param x first argument +/// \param y second argument +/// \return arc tangent value +// template typename enable::type +// atan2(T x, U y) { return functions::atan2(x, y); +//} +inline expr atan2(half x, half y) { return functions::atan2(x, y); } +inline expr atan2(half x, expr y) { return functions::atan2(x, y); } +inline expr atan2(expr x, half y) { return functions::atan2(x, y); } +inline expr atan2(expr x, expr y) { return functions::atan2(x, y); } + +/// \} +/// \name Hyperbolic functions +/// \{ + +/// Hyperbolic sine. +/// \param arg function argument +/// \return hyperbolic sine value of \a arg +// template typename enable::type sinh(T arg) { +// return functions::sinh(arg); } +inline expr sinh(half arg) { return functions::sinh(arg); } +inline expr sinh(expr arg) { return functions::sinh(arg); } + +/// Hyperbolic cosine. +/// \param arg function argument +/// \return hyperbolic cosine value of \a arg +// template typename enable::type cosh(T arg) { +// return functions::cosh(arg); } +inline expr cosh(half arg) { return functions::cosh(arg); } +inline expr cosh(expr arg) { return functions::cosh(arg); } + +/// Hyperbolic tangent. +/// \param arg function argument +/// \return hyperbolic tangent value of \a arg +// template typename enable::type tanh(T arg) { +// return functions::tanh(arg); } +inline expr tanh(half arg) { return functions::tanh(arg); } +inline expr tanh(expr arg) { return functions::tanh(arg); } + +/// Hyperbolic area sine. +/// \param arg function argument +/// \return area sine value of \a arg +// template typename enable::type asinh(T arg) +//{ +// return functions::asinh(arg); } +inline expr asinh(half arg) { return functions::asinh(arg); } +inline expr asinh(expr arg) { return functions::asinh(arg); } + +/// Hyperbolic area cosine. +/// \param arg function argument +/// \return area cosine value of \a arg +// template typename enable::type acosh(T arg) +//{ +// return functions::acosh(arg); } +inline expr acosh(half arg) { return functions::acosh(arg); } +inline expr acosh(expr arg) { return functions::acosh(arg); } + +/// Hyperbolic area tangent. +/// \param arg function argument +/// \return area tangent value of \a arg +// template typename enable::type atanh(T arg) +//{ +// return functions::atanh(arg); } +inline expr atanh(half arg) { return functions::atanh(arg); } +inline expr atanh(expr arg) { return functions::atanh(arg); } + +/// \} +/// \name Error and gamma functions +/// \{ + +/// Error function. +/// \param arg function argument +/// \return error function value of \a arg +// template typename enable::type erf(T arg) { +// return functions::erf(arg); } +inline expr erf(half arg) { return functions::erf(arg); } +inline expr erf(expr arg) { return functions::erf(arg); } + +/// Complementary error function. +/// \param arg function argument +/// \return 1 minus error function value of \a arg +// template typename enable::type erfc(T arg) { +// return functions::erfc(arg); } +inline expr erfc(half arg) { return functions::erfc(arg); } +inline expr erfc(expr arg) { return functions::erfc(arg); } + +/// Natural logarithm of gamma function. +/// \param arg function argument +/// \return natural logarith of gamma function for \a arg +// template typename enable::type lgamma(T arg) +//{ +// return functions::lgamma(arg); } +inline expr lgamma(half arg) { return functions::lgamma(arg); } +inline expr lgamma(expr arg) { return functions::lgamma(arg); } + +/// Gamma function. +/// \param arg function argument +/// \return gamma function value of \a arg +// template typename enable::type tgamma(T arg) +//{ +// return functions::tgamma(arg); } +inline expr tgamma(half arg) { return functions::tgamma(arg); } +inline expr tgamma(expr arg) { return functions::tgamma(arg); } + +/// \} +/// \name Rounding +/// \{ + +/// Nearest integer not less than half value. +/// \param arg half to round +/// \return nearest integer not less than \a arg +// template typename enable::type ceil(T arg) { +// return functions::ceil(arg); } +inline half ceil(half arg) { return functions::ceil(arg); } +inline half ceil(expr arg) { return functions::ceil(arg); } + +/// Nearest integer not greater than half value. +/// \param arg half to round +/// \return nearest integer not greater than \a arg +// template typename enable::type floor(T arg) +//{ +// return functions::floor(arg); } +inline half floor(half arg) { return functions::floor(arg); } +inline half floor(expr arg) { return functions::floor(arg); } + +/// Nearest integer not greater in magnitude than half value. +/// \param arg half to round +/// \return nearest integer not greater in magnitude than \a arg +// template typename enable::type trunc(T arg) +//{ +// return functions::trunc(arg); } +inline half trunc(half arg) { return functions::trunc(arg); } +inline half trunc(expr arg) { return functions::trunc(arg); } + +/// Nearest integer. +/// \param arg half to round +/// \return nearest integer, rounded away from zero in half-way cases +// template typename enable::type round(T arg) +//{ +// return functions::round(arg); } +inline half round(half arg) { return functions::round(arg); } +inline half round(expr arg) { return functions::round(arg); } + +/// Nearest integer. +/// \param arg half to round +/// \return nearest integer, rounded away from zero in half-way cases +// template typename enable::type lround(T arg) +//{ +// return functions::lround(arg); } +inline long lround(half arg) { return functions::lround(arg); } +inline long lround(expr arg) { return functions::lround(arg); } + +/// Nearest integer using half's internal rounding mode. +/// \param arg half expression to round +/// \return nearest integer using default rounding mode +// template typename enable::type nearbyint(T +// arg) { return functions::nearbyint(arg); } +inline half nearbyint(half arg) { return functions::rint(arg); } +inline half nearbyint(expr arg) { return functions::rint(arg); } + +/// Nearest integer using half's internal rounding mode. +/// \param arg half expression to round +/// \return nearest integer using default rounding mode +// template typename enable::type rint(T arg) { +// return functions::rint(arg); } +inline half rint(half arg) { return functions::rint(arg); } +inline half rint(expr arg) { return functions::rint(arg); } + +/// Nearest integer using half's internal rounding mode. +/// \param arg half expression to round +/// \return nearest integer using default rounding mode +// template typename enable::type lrint(T arg) +//{ +// return functions::lrint(arg); } +inline long lrint(half arg) { return functions::lrint(arg); } +inline long lrint(expr arg) { return functions::lrint(arg); } +#if HALF_ENABLE_CPP11_LONG_LONG +/// Nearest integer. +/// \param arg half to round +/// \return nearest integer, rounded away from zero in half-way cases +// template typename enable::type +// llround(T +// arg) { return functions::llround(arg); } +inline long long llround(half arg) { return functions::llround(arg); } +inline long long llround(expr arg) { return functions::llround(arg); } + +/// Nearest integer using half's internal rounding mode. +/// \param arg half expression to round +/// \return nearest integer using default rounding mode +// template typename enable::type llrint(T +// arg) { return functions::llrint(arg); } +inline long long llrint(half arg) { return functions::llrint(arg); } +inline long long llrint(expr arg) { return functions::llrint(arg); } +#endif + +/// \} +/// \name Floating point manipulation +/// \{ + +/// Decompress floating point number. +/// \param arg number to decompress +/// \param exp address to store exponent at +/// \return significant in range [0.5, 1) +// template typename enable::type frexp(T arg, +// int *exp) { return functions::frexp(arg, exp); } +inline half frexp(half arg, int* exp) { return functions::frexp(arg, exp); } +inline half frexp(expr arg, int* exp) { return functions::frexp(arg, exp); } + +/// Multiply by power of two. +/// \param arg number to modify +/// \param exp power of two to multiply with +/// \return \a arg multplied by 2 raised to \a exp +// template typename enable::type ldexp(T arg, +// int exp) { return functions::scalbln(arg, exp); +//} +inline half ldexp(half arg, int exp) { return functions::scalbln(arg, exp); } +inline half ldexp(expr arg, int exp) { return functions::scalbln(arg, exp); } + +/// Extract integer and fractional parts. +/// \param arg number to decompress +/// \param iptr address to store integer part at +/// \return fractional part +// template typename enable::type modf(T arg, +// half *iptr) { return functions::modf(arg, iptr); +//} +inline half modf(half arg, half* iptr) { return functions::modf(arg, iptr); } +inline half modf(expr arg, half* iptr) { return functions::modf(arg, iptr); } + +/// Multiply by power of two. +/// \param arg number to modify +/// \param exp power of two to multiply with +/// \return \a arg multplied by 2 raised to \a exp +// template typename enable::type scalbn(T arg, +// int exp) { return functions::scalbln(arg, exp); +//} +inline half scalbn(half arg, int exp) { return functions::scalbln(arg, exp); } +inline half scalbn(expr arg, int exp) { return functions::scalbln(arg, exp); } + +/// Multiply by power of two. +/// \param arg number to modify +/// \param exp power of two to multiply with +/// \return \a arg multplied by 2 raised to \a exp +// template typename enable::type scalbln(T +// arg, +// long exp) { return functions::scalbln(arg, +// exp); +//} +inline half scalbln(half arg, long exp) { return functions::scalbln(arg, exp); } +inline half scalbln(expr arg, long exp) { return functions::scalbln(arg, exp); } + +/// Extract exponent. +/// \param arg number to query +/// \return floating point exponent +/// \retval FP_ILOGB0 for zero +/// \retval FP_ILOGBNAN for NaN +/// \retval MAX_INT for infinity +// template typename enable::type ilogb(T arg) { +// return functions::ilogb(arg); } +inline int ilogb(half arg) { return functions::ilogb(arg); } +inline int ilogb(expr arg) { return functions::ilogb(arg); } + +/// Extract exponent. +/// \param arg number to query +/// \return floating point exponent +// template typename enable::type logb(T arg) { +// return functions::logb(arg); } +inline half logb(half arg) { return functions::logb(arg); } +inline half logb(expr arg) { return functions::logb(arg); } + +/// Next representable value. +/// \param from value to compute next representable value for +/// \param to direction towards which to compute next value +/// \return next representable value after \a from in direction towards \a to +// template typename enable::type +// nextafter(T from, U to) { return +// functions::nextafter(from, to); } +inline half nextafter(half from, half to) { + return functions::nextafter(from, to); +} +inline half nextafter(half from, expr to) { + return functions::nextafter(from, to); +} +inline half nextafter(expr from, half to) { + return functions::nextafter(from, to); +} +inline half nextafter(expr from, expr to) { + return functions::nextafter(from, to); +} + +/// Next representable value. +/// \param from value to compute next representable value for +/// \param to direction towards which to compute next value +/// \return next representable value after \a from in direction towards \a to +// template typename enable::type nexttoward(T +// from, long double to) { return +// functions::nexttoward(from, to); } +inline half nexttoward(half from, long double to) { + return functions::nexttoward(from, to); +} +inline half nexttoward(expr from, long double to) { + return functions::nexttoward(from, to); +} + +/// Take sign. +/// \param x value to change sign for +/// \param y value to take sign from +/// \return value equal to \a x in magnitude and to \a y in sign +// template typename enable::type +// copysign(T x, U y) { return +// functions::copysign(x, y); } +inline half copysign(half x, half y) { return functions::copysign(x, y); } +inline half copysign(half x, expr y) { return functions::copysign(x, y); } +inline half copysign(expr x, half y) { return functions::copysign(x, y); } +inline half copysign(expr x, expr y) { return functions::copysign(x, y); } + +/// \} +/// \name Floating point classification +/// \{ + +/// Classify floating point value. +/// \param arg number to classify +/// \retval FP_ZERO for positive and negative zero +/// \retval FP_SUBNORMAL for subnormal numbers +/// \retval FP_INFINITY for positive and negative infinity +/// \retval FP_NAN for NaNs +/// \retval FP_NORMAL for all other (normal) values +// template typename enable::type fpclassify(T +// arg) { return functions::fpclassify(arg); } +inline int fpclassify(half arg) { return functions::fpclassify(arg); } +inline int fpclassify(expr arg) { return functions::fpclassify(arg); } + +/// Check if finite number. +/// \param arg number to check +/// \retval true if neither infinity nor NaN +/// \retval false else +// template typename enable::type isfinite(T +// arg) +//{ return functions::isfinite(arg); } +inline bool isfinite(half arg) { return functions::isfinite(arg); } +inline bool isfinite(expr arg) { return functions::isfinite(arg); } + +/// Check for infinity. +/// \param arg number to check +/// \retval true for positive or negative infinity +/// \retval false else +// template typename enable::type isinf(T arg) +//{ +// return functions::isinf(arg); } +inline bool isinf(half arg) { return functions::isinf(arg); } +inline bool isinf(expr arg) { return functions::isinf(arg); } + +/// Check for NaN. +/// \param arg number to check +/// \retval true for NaNs +/// \retval false else +// template typename enable::type isnan(T arg) +//{ +// return functions::isnan(arg); } +inline bool isnan(half arg) { return functions::isnan(arg); } +inline bool isnan(expr arg) { return functions::isnan(arg); } + +/// Check if normal number. +/// \param arg number to check +/// \retval true if normal number +/// \retval false if either subnormal, zero, infinity or NaN +// template typename enable::type isnormal(T +// arg) +//{ return functions::isnormal(arg); } +inline bool isnormal(half arg) { return functions::isnormal(arg); } +inline bool isnormal(expr arg) { return functions::isnormal(arg); } + +/// Check sign. +/// \param arg number to check +/// \retval true for negative number +/// \retval false for positive number +// template typename enable::type signbit(T +// arg) +//{ return functions::signbit(arg); } +inline bool signbit(half arg) { return functions::signbit(arg); } +inline bool signbit(expr arg) { return functions::signbit(arg); } + +/// \} +/// \name Comparison +/// \{ + +/// Comparison for greater than. +/// \param x first operand +/// \param y second operand +/// \retval true if \a x greater than \a y +/// \retval false else +// template typename enable::type +// isgreater(T x, U y) { return +// functions::isgreater(x, y); } +inline bool isgreater(half x, half y) { return functions::isgreater(x, y); } +inline bool isgreater(half x, expr y) { return functions::isgreater(x, y); } +inline bool isgreater(expr x, half y) { return functions::isgreater(x, y); } +inline bool isgreater(expr x, expr y) { return functions::isgreater(x, y); } + +/// Comparison for greater equal. +/// \param x first operand +/// \param y second operand +/// \retval true if \a x greater equal \a y +/// \retval false else +// template typename enable::type +// isgreaterequal(T x, U y) { return +// functions::isgreaterequal(x, y); } +inline bool isgreaterequal(half x, half y) { + return functions::isgreaterequal(x, y); +} +inline bool isgreaterequal(half x, expr y) { + return functions::isgreaterequal(x, y); +} +inline bool isgreaterequal(expr x, half y) { + return functions::isgreaterequal(x, y); +} +inline bool isgreaterequal(expr x, expr y) { + return functions::isgreaterequal(x, y); +} + +/// Comparison for less than. +/// \param x first operand +/// \param y second operand +/// \retval true if \a x less than \a y +/// \retval false else +// template typename enable::type +// isless(T x, U y) { return functions::isless(x, +// y); +//} +inline bool isless(half x, half y) { return functions::isless(x, y); } +inline bool isless(half x, expr y) { return functions::isless(x, y); } +inline bool isless(expr x, half y) { return functions::isless(x, y); } +inline bool isless(expr x, expr y) { return functions::isless(x, y); } + +/// Comparison for less equal. +/// \param x first operand +/// \param y second operand +/// \retval true if \a x less equal \a y +/// \retval false else +// template typename enable::type +// islessequal(T x, U y) { return +// functions::islessequal(x, y); } +inline bool islessequal(half x, half y) { return functions::islessequal(x, y); } +inline bool islessequal(half x, expr y) { return functions::islessequal(x, y); } +inline bool islessequal(expr x, half y) { return functions::islessequal(x, y); } +inline bool islessequal(expr x, expr y) { return functions::islessequal(x, y); } + +/// Comarison for less or greater. +/// \param x first operand +/// \param y second operand +/// \retval true if either less or greater +/// \retval false else +// template typename enable::type +// islessgreater(T x, U y) { return +// functions::islessgreater(x, y); } +inline bool islessgreater(half x, half y) { + return functions::islessgreater(x, y); +} +inline bool islessgreater(half x, expr y) { + return functions::islessgreater(x, y); +} +inline bool islessgreater(expr x, half y) { + return functions::islessgreater(x, y); +} +inline bool islessgreater(expr x, expr y) { + return functions::islessgreater(x, y); +} + +/// Check if unordered. +/// \param x first operand +/// \param y second operand +/// \retval true if unordered (one or two NaN operands) +/// \retval false else +// template typename enable::type +// isunordered(T x, U y) { return +// functions::isunordered(x, y); } +inline bool isunordered(half x, half y) { return functions::isunordered(x, y); } +inline bool isunordered(half x, expr y) { return functions::isunordered(x, y); } +inline bool isunordered(expr x, half y) { return functions::isunordered(x, y); } +inline bool isunordered(expr x, expr y) { return functions::isunordered(x, y); } + +/// \name Casting +/// \{ + +/// Cast to or from half-precision floating point number. +/// This casts between [half](\ref half_float::half) and any built-in arithmetic +/// type. The values are converted +/// directly using the given rounding mode, without any roundtrip over `float` +/// that a `static_cast` would otherwise do. +/// It uses the default rounding mode. +/// +/// Using this cast with neither of the two types being a [half](\ref +/// half_float::half) or with any of the two types +/// not being a built-in arithmetic type (apart from [half](\ref +/// half_float::half), of course) results in a compiler +/// error and casting between [half](\ref half_float::half)s is just a no-op. +/// \tparam T destination type (half or built-in arithmetic type) +/// \tparam U source type (half or built-in arithmetic type) +/// \param arg value to cast +/// \return \a arg converted to destination type +template T half_cast(U arg) { + return half_caster::cast(arg); +} + +/// Cast to or from half-precision floating point number. +/// This casts between [half](\ref half_float::half) and any built-in arithmetic +/// type. The values are converted +/// directly using the given rounding mode, without any roundtrip over `float` +/// that a `static_cast` would otherwise do. +/// +/// Using this cast with neither of the two types being a [half](\ref +/// half_float::half) or with any of the two types +/// not being a built-in arithmetic type (apart from [half](\ref +/// half_float::half), of course) results in a compiler +/// error and casting between [half](\ref half_float::half)s is just a no-op. +/// \tparam T destination type (half or built-in arithmetic type) +/// \tparam R rounding mode to use. +/// \tparam U source type (half or built-in arithmetic type) +/// \param arg value to cast +/// \return \a arg converted to destination type +template T half_cast(U arg) { + return half_caster::cast(arg); +} +/// \} +} // namespace detail + +using detail::operator==; +using detail::operator!=; +using detail::operator<; +using detail::operator>; +using detail::operator<=; +using detail::operator>=; +using detail::operator+; +using detail::operator-; +using detail::operator*; +using detail::operator/; +using detail::operator<<; +using detail::operator>>; + +using detail::abs; +using detail::acos; +using detail::acosh; +using detail::asin; +using detail::asinh; +using detail::atan; +using detail::atan2; +using detail::atanh; +using detail::cbrt; +using detail::ceil; +using detail::cos; +using detail::cosh; +using detail::erf; +using detail::erfc; +using detail::exp; +using detail::exp2; +using detail::expm1; +using detail::fabs; +using detail::fdim; +using detail::floor; +using detail::fma; +using detail::fmax; +using detail::fmin; +using detail::fmod; +using detail::hypot; +using detail::lgamma; +using detail::log; +using detail::log10; +using detail::log1p; +using detail::log2; +using detail::lrint; +using detail::lround; +using detail::nanh; +using detail::nearbyint; +using detail::pow; +using detail::remainder; +using detail::remquo; +using detail::rint; +using detail::round; +using detail::sin; +using detail::sinh; +using detail::sqrt; +using detail::tan; +using detail::tanh; +using detail::tgamma; +using detail::trunc; +#if HALF_ENABLE_CPP11_LONG_LONG +using detail::llrint; +using detail::llround; +#endif +using detail::copysign; +using detail::fpclassify; +using detail::frexp; +using detail::ilogb; +using detail::isfinite; +using detail::isgreater; +using detail::isgreaterequal; +using detail::isinf; +using detail::isless; +using detail::islessequal; +using detail::islessgreater; +using detail::isnan; +using detail::isnormal; +using detail::isunordered; +using detail::ldexp; +using detail::logb; +using detail::modf; +using detail::nextafter; +using detail::nexttoward; +using detail::scalbln; +using detail::scalbn; +using detail::signbit; + +using detail::half_cast; +} // namespace half_float + +/// Extensions to the C++ standard library. +namespace std { +/// Numeric limits for half-precision floats. +/// Because of the underlying single-precision implementation of many +/// operations, it inherits some properties from +/// `std::numeric_limits`. +template <> +class numeric_limits : public numeric_limits { + public: + /// Supports signed values. + static HALF_CONSTEXPR_CONST bool is_signed = true; + + /// Is not exact. + static HALF_CONSTEXPR_CONST bool is_exact = false; + + /// Doesn't provide modulo arithmetic. + static HALF_CONSTEXPR_CONST bool is_modulo = false; + + /// IEEE conformant. + static HALF_CONSTEXPR_CONST bool is_iec559 = true; + + /// Supports infinity. + static HALF_CONSTEXPR_CONST bool has_infinity = true; + + /// Supports quiet NaNs. + static HALF_CONSTEXPR_CONST bool has_quiet_NaN = true; + + /// Supports subnormal values. + static HALF_CONSTEXPR_CONST float_denorm_style has_denorm = denorm_present; + + /// Rounding mode. + /// Due to the mix of internal single-precision computations (using the + /// rounding mode of the underlying + /// single-precision implementation) with the rounding mode of the + /// single-to-half conversions, the actual rounding + /// mode might be `std::round_indeterminate` if the default half-precision + /// rounding mode doesn't match the + /// single-precision rounding mode. + static HALF_CONSTEXPR_CONST float_round_style round_style = + (std::numeric_limits::round_style == half_float::half::round_style) + ? half_float::half::round_style + : round_indeterminate; + + /// Significant digits. + static HALF_CONSTEXPR_CONST int digits = 11; + + /// Significant decimal digits. + static HALF_CONSTEXPR_CONST int digits10 = 3; + + /// Required decimal digits to represent all possible values. + static HALF_CONSTEXPR_CONST int max_digits10 = 5; + + /// Number base. + static HALF_CONSTEXPR_CONST int radix = 2; + + /// One more than smallest exponent. + static HALF_CONSTEXPR_CONST int min_exponent = -13; + + /// Smallest normalized representable power of 10. + static HALF_CONSTEXPR_CONST int min_exponent10 = -4; + + /// One more than largest exponent + static HALF_CONSTEXPR_CONST int max_exponent = 16; + + /// Largest finitely representable power of 10. + static HALF_CONSTEXPR_CONST int max_exponent10 = 4; + + /// Smallest positive normal value. + static HALF_CONSTEXPR half_float::half min() HALF_NOTHROW { + return half_float::half(half_float::detail::binary, 0x0400); + } + + /// Smallest finite value. + static HALF_CONSTEXPR half_float::half lowest() HALF_NOTHROW { + return half_float::half(half_float::detail::binary, 0xFBFF); + } + + /// Largest finite value. + static HALF_CONSTEXPR half_float::half max() HALF_NOTHROW { + return half_float::half(half_float::detail::binary, 0x7BFF); + } + + /// Difference between one and next representable value. + static HALF_CONSTEXPR half_float::half epsilon() HALF_NOTHROW { + return half_float::half(half_float::detail::binary, 0x1400); + } + + /// Maximum rounding error. + static HALF_CONSTEXPR half_float::half round_error() HALF_NOTHROW { + return half_float::half(half_float::detail::binary, + (round_style == std::round_to_nearest) ? 0x3800 + : 0x3C00); + } + + /// Positive infinity. + static HALF_CONSTEXPR half_float::half infinity() HALF_NOTHROW { + return half_float::half(half_float::detail::binary, 0x7C00); + } + + /// Quiet NaN. + static HALF_CONSTEXPR half_float::half quiet_NaN() HALF_NOTHROW { + return half_float::half(half_float::detail::binary, 0x7FFF); + } + + /// Signalling NaN. + static HALF_CONSTEXPR half_float::half signaling_NaN() HALF_NOTHROW { + return half_float::half(half_float::detail::binary, 0x7DFF); + } + + /// Smallest positive subnormal value. + static HALF_CONSTEXPR half_float::half denorm_min() HALF_NOTHROW { + return half_float::half(half_float::detail::binary, 0x0001); + } +}; + +#if HALF_ENABLE_CPP11_HASH +/// Hash function for half-precision floats. +/// This is only defined if C++11 `std::hash` is supported and enabled. +template <> +struct hash //: unary_function +{ + /// Type of function argument. + typedef half_float::half argument_type; + + /// Function return type. + typedef size_t result_type; + + /// Compute hash function. + /// \param arg half to hash + /// \return hash value + result_type operator()(argument_type arg) const { + return hash()(static_cast(arg.data_) & + -(arg.data_ != 0x8000)); + } +}; +#endif +} // namespace std + +#undef HALF_CONSTEXPR +#undef HALF_CONSTEXPR_CONST +#undef HALF_NOEXCEPT +#undef HALF_NOTHROW +#ifdef HALF_POP_WARNINGS +#pragma warning(pop) +#undef HALF_POP_WARNINGS +#endif + +#endif diff --git a/fastdeploy/backends/tensorrt/common/logger.cpp b/fastdeploy/backends/tensorrt/common/logger.cpp new file mode 100644 index 0000000000..1e1671558a --- /dev/null +++ b/fastdeploy/backends/tensorrt/common/logger.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "logger.h" +#include "ErrorRecorder.h" +#include "logging.h" + +SampleErrorRecorder gRecorder; +namespace sample { +Logger gLogger{Logger::Severity::kINFO}; +LogStreamConsumer gLogVerbose{LOG_VERBOSE(gLogger)}; +LogStreamConsumer gLogInfo{LOG_INFO(gLogger)}; +LogStreamConsumer gLogWarning{LOG_WARN(gLogger)}; +LogStreamConsumer gLogError{LOG_ERROR(gLogger)}; +LogStreamConsumer gLogFatal{LOG_FATAL(gLogger)}; + +void setReportableSeverity(Logger::Severity severity) { + gLogger.setReportableSeverity(severity); + gLogVerbose.setReportableSeverity(severity); + gLogInfo.setReportableSeverity(severity); + gLogWarning.setReportableSeverity(severity); + gLogError.setReportableSeverity(severity); + gLogFatal.setReportableSeverity(severity); +} +} // namespace sample diff --git a/fastdeploy/backends/tensorrt/common/logger.h b/fastdeploy/backends/tensorrt/common/logger.h new file mode 100644 index 0000000000..ab642744e2 --- /dev/null +++ b/fastdeploy/backends/tensorrt/common/logger.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LOGGER_H +#define LOGGER_H + +#include "logging.h" + +class SampleErrorRecorder; +extern SampleErrorRecorder gRecorder; +namespace sample { +extern Logger gLogger; +extern LogStreamConsumer gLogVerbose; +extern LogStreamConsumer gLogInfo; +extern LogStreamConsumer gLogWarning; +extern LogStreamConsumer gLogError; +extern LogStreamConsumer gLogFatal; + +void setReportableSeverity(Logger::Severity severity); +} // namespace sample + +#endif // LOGGER_H diff --git a/fastdeploy/backends/tensorrt/common/logging.h b/fastdeploy/backends/tensorrt/common/logging.h new file mode 100644 index 0000000000..abcb6b4066 --- /dev/null +++ b/fastdeploy/backends/tensorrt/common/logging.h @@ -0,0 +1,573 @@ +/* + * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef TENSORRT_LOGGING_H +#define TENSORRT_LOGGING_H + +#include "NvInferRuntimeCommon.h" +#include "sampleOptions.h" +#include +#include +#include +#include +#include +#include +#include +#include + +namespace sample { + +using Severity = nvinfer1::ILogger::Severity; + +class LogStreamConsumerBuffer : public std::stringbuf { + public: + LogStreamConsumerBuffer(std::ostream& stream, const std::string& prefix, + bool shouldLog) + : mOutput(stream), mPrefix(prefix), mShouldLog(shouldLog) {} + + LogStreamConsumerBuffer(LogStreamConsumerBuffer&& other) noexcept + : mOutput(other.mOutput), mPrefix(other.mPrefix), + mShouldLog(other.mShouldLog) {} + LogStreamConsumerBuffer(const LogStreamConsumerBuffer& other) = delete; + LogStreamConsumerBuffer() = delete; + LogStreamConsumerBuffer& operator=(const LogStreamConsumerBuffer&) = delete; + LogStreamConsumerBuffer& operator=(LogStreamConsumerBuffer&&) = delete; + + ~LogStreamConsumerBuffer() override { + // std::streambuf::pbase() gives a pointer to the beginning of the buffered + // part of the output sequence + // std::streambuf::pptr() gives a pointer to the current position of the + // output sequence + // if the pointer to the beginning is not equal to the pointer to the + // current position, + // call putOutput() to log the output to the stream + if (pbase() != pptr()) { + putOutput(); + } + } + + //! + //! synchronizes the stream buffer and returns 0 on success + //! synchronizing the stream buffer consists of inserting the buffer contents + //! into the stream, + //! resetting the buffer and flushing the stream + //! + int32_t sync() override { + putOutput(); + return 0; + } + + void putOutput() { + if (mShouldLog) { + // prepend timestamp + std::time_t timestamp = std::time(nullptr); + tm* tm_local = std::localtime(×tamp); + mOutput << "["; + mOutput << std::setw(2) << std::setfill('0') << 1 + tm_local->tm_mon + << "/"; + mOutput << std::setw(2) << std::setfill('0') << tm_local->tm_mday << "/"; + mOutput << std::setw(4) << std::setfill('0') << 1900 + tm_local->tm_year + << "-"; + mOutput << std::setw(2) << std::setfill('0') << tm_local->tm_hour << ":"; + mOutput << std::setw(2) << std::setfill('0') << tm_local->tm_min << ":"; + mOutput << std::setw(2) << std::setfill('0') << tm_local->tm_sec << "] "; + // std::stringbuf::str() gets the string contents of the buffer + // insert the buffer contents pre-appended by the appropriate prefix into + // the stream + mOutput << mPrefix << str(); + } + // set the buffer to empty + str(""); + // flush the stream + mOutput.flush(); + } + + void setShouldLog(bool shouldLog) { mShouldLog = shouldLog; } + + private: + std::ostream& mOutput; + std::string mPrefix; + bool mShouldLog{}; +}; // class LogStreamConsumerBuffer + +//! +//! \class LogStreamConsumerBase +//! \brief Convenience object used to initialize LogStreamConsumerBuffer before +//! std::ostream in LogStreamConsumer +//! +class LogStreamConsumerBase { + public: + LogStreamConsumerBase(std::ostream& stream, const std::string& prefix, + bool shouldLog) + : mBuffer(stream, prefix, shouldLog) {} + + protected: + std::mutex mLogMutex; + LogStreamConsumerBuffer mBuffer; +}; // class LogStreamConsumerBase + +//! +//! \class LogStreamConsumer +//! \brief Convenience object used to facilitate use of C++ stream syntax when +//! logging messages. +//! Order of base classes is LogStreamConsumerBase and then std::ostream. +//! This is because the LogStreamConsumerBase class is used to initialize the +//! LogStreamConsumerBuffer member field +//! in LogStreamConsumer and then the address of the buffer is passed to +//! std::ostream. +//! This is necessary to prevent the address of an uninitialized buffer from +//! being passed to std::ostream. +//! Please do not change the order of the parent classes. +//! +class LogStreamConsumer : protected LogStreamConsumerBase, public std::ostream { + public: + //! + //! \brief Creates a LogStreamConsumer which logs messages with level + //! severity. + //! Reportable severity determines if the messages are severe enough to be + //! logged. + //! + LogStreamConsumer(nvinfer1::ILogger::Severity reportableSeverity, + nvinfer1::ILogger::Severity severity) + : LogStreamConsumerBase(severityOstream(severity), + severityPrefix(severity), + severity <= reportableSeverity), + std::ostream(&mBuffer) // links the stream buffer with the stream + , + mShouldLog(severity <= reportableSeverity), mSeverity(severity) {} + + LogStreamConsumer(LogStreamConsumer&& other) noexcept + : LogStreamConsumerBase(severityOstream(other.mSeverity), + severityPrefix(other.mSeverity), + other.mShouldLog), + std::ostream(&mBuffer) // links the stream buffer with the stream + , + mShouldLog(other.mShouldLog), mSeverity(other.mSeverity) {} + LogStreamConsumer(const LogStreamConsumer& other) = delete; + LogStreamConsumer() = delete; + ~LogStreamConsumer() = default; + LogStreamConsumer& operator=(const LogStreamConsumer&) = delete; + LogStreamConsumer& operator=(LogStreamConsumer&&) = delete; + + void setReportableSeverity(Severity reportableSeverity) { + mShouldLog = mSeverity <= reportableSeverity; + mBuffer.setShouldLog(mShouldLog); + } + + std::mutex& getMutex() { return mLogMutex; } + + bool getShouldLog() const { return mShouldLog; } + + private: + static std::ostream& severityOstream(Severity severity) { + return severity >= Severity::kINFO ? std::cout : std::cerr; + } + + static std::string severityPrefix(Severity severity) { + switch (severity) { + case Severity::kINTERNAL_ERROR: + return "[F] "; + case Severity::kERROR: + return "[E] "; + case Severity::kWARNING: + return "[W] "; + case Severity::kINFO: + return "[I] "; + case Severity::kVERBOSE: + return "[V] "; + default: + assert(0); + return ""; + } + } + + bool mShouldLog; + Severity mSeverity; +}; // class LogStreamConsumer + +template +LogStreamConsumer& operator<<(LogStreamConsumer& logger, const T& obj) { + if (logger.getShouldLog()) { + std::lock_guard guard(logger.getMutex()); + auto& os = static_cast(logger); + os << obj; + } + return logger; +} + +//! +//! Special handling std::endl +//! +inline LogStreamConsumer& operator<<(LogStreamConsumer& logger, + std::ostream& (*f)(std::ostream&)) { + if (logger.getShouldLog()) { + std::lock_guard guard(logger.getMutex()); + auto& os = static_cast(logger); + os << f; + } + return logger; +} + +inline LogStreamConsumer& operator<<(LogStreamConsumer& logger, + const nvinfer1::Dims& dims) { + if (logger.getShouldLog()) { + std::lock_guard guard(logger.getMutex()); + auto& os = static_cast(logger); + for (int32_t i = 0; i < dims.nbDims; ++i) { + os << (i ? "x" : "") << dims.d[i]; + } + } + return logger; +} + +//! +//! \class Logger +//! +//! \brief Class which manages logging of TensorRT tools and samples +//! +//! \details This class provides a common interface for TensorRT tools and +//! samples to log information to the console, +//! and supports logging two types of messages: +//! +//! - Debugging messages with an associated severity (info, warning, error, or +//! internal error/fatal) +//! - Test pass/fail messages +//! +//! The advantage of having all samples use this class for logging as opposed to +//! emitting directly to stdout/stderr is +//! that the logic for controlling the verbosity and formatting of sample output +//! is centralized in one location. +//! +//! In the future, this class could be extended to support dumping test results +//! to a file in some standard format +//! (for example, JUnit XML), and providing additional metadata (e.g. timing the +//! duration of a test run). +//! +//! TODO: For backwards compatibility with existing samples, this class inherits +//! directly from the nvinfer1::ILogger +//! interface, which is problematic since there isn't a clean separation between +//! messages coming from the TensorRT +//! library and messages coming from the sample. +//! +//! In the future (once all samples are updated to use Logger::getTRTLogger() to +//! access the ILogger) we can refactor the +//! class to eliminate the inheritance and instead make the nvinfer1::ILogger +//! implementation a member of the Logger +//! object. +//! +class Logger : public nvinfer1::ILogger { + public: + explicit Logger(Severity severity = Severity::kWARNING) + : mReportableSeverity(severity) {} + + //! + //! \enum TestResult + //! \brief Represents the state of a given test + //! + enum class TestResult { + kRUNNING, //!< The test is running + kPASSED, //!< The test passed + kFAILED, //!< The test failed + kWAIVED //!< The test was waived + }; + + //! + //! \brief Forward-compatible method for retrieving the nvinfer::ILogger + //! associated with this Logger + //! \return The nvinfer1::ILogger associated with this Logger + //! + //! TODO Once all samples are updated to use this method to register the + //! logger with TensorRT, + //! we can eliminate the inheritance of Logger from ILogger + //! + nvinfer1::ILogger& getTRTLogger() noexcept { return *this; } + + //! + //! \brief Implementation of the nvinfer1::ILogger::log() virtual method + //! + //! Note samples should not be calling this function directly; it will + //! eventually go away once we eliminate the + //! inheritance from nvinfer1::ILogger + //! + void log(Severity severity, const char* msg) noexcept override { + LogStreamConsumer(mReportableSeverity, severity) + << "[TRT] " << std::string(msg) << std::endl; + } + + //! + //! \brief Method for controlling the verbosity of logging output + //! + //! \param severity The logger will only emit messages that have severity of + //! this level or higher. + //! + void setReportableSeverity(Severity severity) noexcept { + mReportableSeverity = severity; + } + + //! + //! \brief Opaque handle that holds logging information for a particular test + //! + //! This object is an opaque handle to information used by the Logger to print + //! test results. + //! The sample must call Logger::defineTest() in order to obtain a TestAtom + //! that can be used + //! with Logger::reportTest{Start,End}(). + //! + class TestAtom { + public: + TestAtom(TestAtom&&) = default; + + private: + friend class Logger; + + TestAtom(bool started, const std::string& name, const std::string& cmdline) + : mStarted(started), mName(name), mCmdline(cmdline) {} + + bool mStarted; + std::string mName; + std::string mCmdline; + }; + + //! + //! \brief Define a test for logging + //! + //! \param[in] name The name of the test. This should be a string starting + //! with + //! "TensorRT" and containing dot-separated strings + //! containing + //! the characters [A-Za-z0-9_]. + //! For example, "TensorRT.sample_googlenet" + //! \param[in] cmdline The command line used to reproduce the test + // + //! \return a TestAtom that can be used in Logger::reportTest{Start,End}(). + //! + static TestAtom defineTest(const std::string& name, + const std::string& cmdline) { + return TestAtom(false, name, cmdline); + } + + //! + //! \brief A convenience overloaded version of defineTest() that accepts an + //! array of command-line arguments + //! as input + //! + //! \param[in] name The name of the test + //! \param[in] argc The number of command-line arguments + //! \param[in] argv The array of command-line arguments (given as C strings) + //! + //! \return a TestAtom that can be used in Logger::reportTest{Start,End}(). + //! + static TestAtom defineTest(const std::string& name, int32_t argc, + char const* const* argv) { + // Append TensorRT version as info + const std::string vname = + name + " [TensorRT v" + std::to_string(NV_TENSORRT_VERSION) + "]"; + auto cmdline = genCmdlineString(argc, argv); + return defineTest(vname, cmdline); + } + + //! + //! \brief Report that a test has started. + //! + //! \pre reportTestStart() has not been called yet for the given testAtom + //! + //! \param[in] testAtom The handle to the test that has started + //! + static void reportTestStart(TestAtom& testAtom) { + reportTestResult(testAtom, TestResult::kRUNNING); + assert(!testAtom.mStarted); + testAtom.mStarted = true; + } + + //! + //! \brief Report that a test has ended. + //! + //! \pre reportTestStart() has been called for the given testAtom + //! + //! \param[in] testAtom The handle to the test that has ended + //! \param[in] result The result of the test. Should be one of + //! TestResult::kPASSED, + //! TestResult::kFAILED, TestResult::kWAIVED + //! + static void reportTestEnd(TestAtom const& testAtom, TestResult result) { + assert(result != TestResult::kRUNNING); + assert(testAtom.mStarted); + reportTestResult(testAtom, result); + } + + static int32_t reportPass(TestAtom const& testAtom) { + reportTestEnd(testAtom, TestResult::kPASSED); + return EXIT_SUCCESS; + } + + static int32_t reportFail(TestAtom const& testAtom) { + reportTestEnd(testAtom, TestResult::kFAILED); + return EXIT_FAILURE; + } + + static int32_t reportWaive(TestAtom const& testAtom) { + reportTestEnd(testAtom, TestResult::kWAIVED); + return EXIT_SUCCESS; + } + + static int32_t reportTest(TestAtom const& testAtom, bool pass) { + return pass ? reportPass(testAtom) : reportFail(testAtom); + } + + Severity getReportableSeverity() const { return mReportableSeverity; } + + private: + //! + //! \brief returns an appropriate string for prefixing a log message with the + //! given severity + //! + static const char* severityPrefix(Severity severity) { + switch (severity) { + case Severity::kINTERNAL_ERROR: + return "[F] "; + case Severity::kERROR: + return "[E] "; + case Severity::kWARNING: + return "[W] "; + case Severity::kINFO: + return "[I] "; + case Severity::kVERBOSE: + return "[V] "; + default: + assert(0); + return ""; + } + } + + //! + //! \brief returns an appropriate string for prefixing a test result message + //! with the given result + //! + static const char* testResultString(TestResult result) { + switch (result) { + case TestResult::kRUNNING: + return "RUNNING"; + case TestResult::kPASSED: + return "PASSED"; + case TestResult::kFAILED: + return "FAILED"; + case TestResult::kWAIVED: + return "WAIVED"; + default: + assert(0); + return ""; + } + } + + //! + //! \brief returns an appropriate output stream (cout or cerr) to use with the + //! given severity + //! + static std::ostream& severityOstream(Severity severity) { + return severity >= Severity::kINFO ? std::cout : std::cerr; + } + + //! + //! \brief method that implements logging test results + //! + static void reportTestResult(TestAtom const& testAtom, TestResult result) { + severityOstream(Severity::kINFO) + << "&&&& " << testResultString(result) << " " << testAtom.mName << " # " + << testAtom.mCmdline << std::endl; + } + + //! + //! \brief generate a command line string from the given (argc, argv) values + //! + static std::string genCmdlineString(int32_t argc, char const* const* argv) { + std::stringstream ss; + for (int32_t i = 0; i < argc; i++) { + if (i > 0) { + ss << " "; + } + ss << argv[i]; + } + return ss.str(); + } + + Severity mReportableSeverity; +}; // class Logger + +namespace { +//! +//! \brief produces a LogStreamConsumer object that can be used to log messages +//! of severity kVERBOSE +//! +//! Example usage: +//! +//! LOG_VERBOSE(logger) << "hello world" << std::endl; +//! +inline LogStreamConsumer LOG_VERBOSE(const Logger& logger) { + return LogStreamConsumer(logger.getReportableSeverity(), Severity::kVERBOSE); +} + +//! +//! \brief produces a LogStreamConsumer object that can be used to log messages +//! of severity kINFO +//! +//! Example usage: +//! +//! LOG_INFO(logger) << "hello world" << std::endl; +//! +inline LogStreamConsumer LOG_INFO(const Logger& logger) { + return LogStreamConsumer(logger.getReportableSeverity(), Severity::kINFO); +} + +//! +//! \brief produces a LogStreamConsumer object that can be used to log messages +//! of severity kWARNING +//! +//! Example usage: +//! +//! LOG_WARN(logger) << "hello world" << std::endl; +//! +inline LogStreamConsumer LOG_WARN(const Logger& logger) { + return LogStreamConsumer(logger.getReportableSeverity(), Severity::kWARNING); +} + +//! +//! \brief produces a LogStreamConsumer object that can be used to log messages +//! of severity kERROR +//! +//! Example usage: +//! +//! LOG_ERROR(logger) << "hello world" << std::endl; +//! +inline LogStreamConsumer LOG_ERROR(const Logger& logger) { + return LogStreamConsumer(logger.getReportableSeverity(), Severity::kERROR); +} + +//! +//! \brief produces a LogStreamConsumer object that can be used to log messages +//! of severity kINTERNAL_ERROR +//! ("fatal" severity) +//! +//! Example usage: +//! +//! LOG_FATAL(logger) << "hello world" << std::endl; +//! +inline LogStreamConsumer LOG_FATAL(const Logger& logger) { + return LogStreamConsumer(logger.getReportableSeverity(), + Severity::kINTERNAL_ERROR); +} +} // anonymous namespace +} // namespace sample +#endif // TENSORRT_LOGGING_H diff --git a/fastdeploy/backends/tensorrt/common/parserOnnxConfig.h b/fastdeploy/backends/tensorrt/common/parserOnnxConfig.h new file mode 100644 index 0000000000..8569ca01c6 --- /dev/null +++ b/fastdeploy/backends/tensorrt/common/parserOnnxConfig.h @@ -0,0 +1,126 @@ +/* + * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef PARSER_ONNX_CONFIG_H +#define PARSER_ONNX_CONFIG_H + +#include +#include +#include + +#include "NvInfer.h" +#include "NvOnnxConfig.h" +#include "NvOnnxParser.h" + +#define ONNX_DEBUG 1 + +/** + * \class ParserOnnxConfig + * \brief Configuration Manager Class Concrete Implementation + * + * \note: + * + */ + +using namespace std; + +class ParserOnnxConfig : public nvonnxparser::IOnnxConfig { + protected: + string mModelFilename{}; + string mTextFilename{}; + string mFullTextFilename{}; + nvinfer1::DataType mModelDtype; + nvonnxparser::IOnnxConfig::Verbosity mVerbosity; + bool mPrintLayercInfo; + + public: + ParserOnnxConfig() + : mModelDtype(nvinfer1::DataType::kFLOAT), + mVerbosity(static_cast(nvinfer1::ILogger::Severity::kWARNING)), + mPrintLayercInfo(false) { +#ifdef ONNX_DEBUG + if (isDebug()) { + std::cout << " ParserOnnxConfig::ctor(): " << this << "\t" << std::endl; + } +#endif + } + + protected: + ~ParserOnnxConfig() { +#ifdef ONNX_DEBUG + if (isDebug()) { + std::cout << "ParserOnnxConfig::dtor(): " << this << std::endl; + } +#endif + } + + public: + virtual void setModelDtype(const nvinfer1::DataType modelDtype) noexcept { + mModelDtype = modelDtype; + } + + virtual nvinfer1::DataType getModelDtype() const noexcept { + return mModelDtype; + } + + virtual const char* getModelFileName() const noexcept { + return mModelFilename.c_str(); + } + virtual void setModelFileName(const char* onnxFilename) noexcept { + mModelFilename = string(onnxFilename); + } + virtual nvonnxparser::IOnnxConfig::Verbosity + getVerbosityLevel() const noexcept { + return mVerbosity; + } + virtual void addVerbosity() noexcept { ++mVerbosity; } + virtual void reduceVerbosity() noexcept { --mVerbosity; } + virtual void + setVerbosityLevel(nvonnxparser::IOnnxConfig::Verbosity verbosity) noexcept { + mVerbosity = verbosity; + } + + virtual const char* getTextFileName() const noexcept { + return mTextFilename.c_str(); + } + virtual void setTextFileName(const char* textFilename) noexcept { + mTextFilename = string(textFilename); + } + virtual const char* getFullTextFileName() const noexcept { + return mFullTextFilename.c_str(); + } + virtual void setFullTextFileName(const char* fullTextFilename) noexcept { + mFullTextFilename = string(fullTextFilename); + } + virtual bool getPrintLayerInfo() const noexcept { return mPrintLayercInfo; } + virtual void setPrintLayerInfo(bool src) noexcept { + mPrintLayercInfo = src; + } //!< get the boolean variable corresponding to the Layer Info, see + //! getPrintLayerInfo() + + virtual bool isDebug() const noexcept { +#if ONNX_DEBUG + return (std::getenv("ONNX_DEBUG") ? true : false); +#else + return false; +#endif + } + + virtual void destroy() noexcept { delete this; } + +}; // class ParserOnnxConfig + +#endif diff --git a/fastdeploy/backends/tensorrt/common/safeCommon.h b/fastdeploy/backends/tensorrt/common/safeCommon.h new file mode 100644 index 0000000000..1aa92ad223 --- /dev/null +++ b/fastdeploy/backends/tensorrt/common/safeCommon.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef TENSORRT_SAFE_COMMON_H +#define TENSORRT_SAFE_COMMON_H + +#include "NvInferRuntimeCommon.h" +#include +#include +#include +#include +#include + +#define CHECK(status) \ + do { \ + auto ret = (status); \ + if (ret != 0) { \ + std::cerr << "Cuda failure: " << ret << std::endl; \ + abort(); \ + } \ + } while (0) + +namespace samplesCommon { +template inline std::shared_ptr infer_object(T* obj) { + if (!obj) { + throw std::runtime_error("Failed to create object"); + } + return std::shared_ptr(obj); +} + +inline uint32_t elementSize(nvinfer1::DataType t) { + switch (t) { + case nvinfer1::DataType::kINT32: + case nvinfer1::DataType::kFLOAT: + return 4; + case nvinfer1::DataType::kHALF: + return 2; + case nvinfer1::DataType::kINT8: + return 1; + case nvinfer1::DataType::kBOOL: + return 1; + } + return 0; +} + +template inline A divUp(A x, B n) { + return (x + n - 1) / n; +} + +} // namespace samplesCommon + +#endif // TENSORRT_SAFE_COMMON_H diff --git a/fastdeploy/backends/tensorrt/common/sampleConfig.h b/fastdeploy/backends/tensorrt/common/sampleConfig.h new file mode 100644 index 0000000000..a097f4dbee --- /dev/null +++ b/fastdeploy/backends/tensorrt/common/sampleConfig.h @@ -0,0 +1,251 @@ +/* + * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SampleConfig_H +#define SampleConfig_H + +#include +#include +#include + +#include "NvInfer.h" +#include "NvOnnxConfig.h" +class SampleConfig : public nvonnxparser::IOnnxConfig { + public: + enum class InputDataFormat : int { kASCII = 0, kPPM = 1 }; + + private: + std::string mModelFilename; + std::string mEngineFilename; + std::string mTextFilename; + std::string mFullTextFilename; + std::string mImageFilename; + std::string mReferenceFilename; + std::string mOutputFilename; + std::string mCalibrationFilename; + std::string mTimingCacheFilename; + int64_t mLabel{-1}; + int64_t mMaxBatchSize{32}; + int64_t mCalibBatchSize{0}; + int64_t mMaxNCalibBatch{0}; + int64_t mFirstCalibBatch{0}; + int64_t mUseDLACore{-1}; + nvinfer1::DataType mModelDtype{nvinfer1::DataType::kFLOAT}; + bool mTF32{true}; + Verbosity mVerbosity{static_cast(nvinfer1::ILogger::Severity::kWARNING)}; + bool mPrintLayercInfo{false}; + bool mDebugBuilder{false}; + InputDataFormat mInputDataFormat{InputDataFormat::kASCII}; + uint64_t mTopK{0}; + float mFailurePercentage{-1.0f}; + float mTolerance{0.0f}; + float mAbsTolerance{1e-5f}; + + public: + SampleConfig() { +#ifdef ONNX_DEBUG + if (isDebug()) { + std::cout << " SampleConfig::ctor(): " << this << "\t" << std::endl; + } +#endif + } + + protected: + ~SampleConfig() { +#ifdef ONNX_DEBUG + if (isDebug()) { + std::cout << "SampleConfig::dtor(): " << this << std::endl; + } +#endif + } + + public: + void setModelDtype(const nvinfer1::DataType mdt) noexcept { + mModelDtype = mdt; + } + + nvinfer1::DataType getModelDtype() const noexcept { return mModelDtype; } + + bool getTF32() const noexcept { return mTF32; } + + void setTF32(bool enabled) noexcept { mTF32 = enabled; } + + const char* getModelFileName() const noexcept { + return mModelFilename.c_str(); + } + + void setModelFileName(const char* onnxFilename) noexcept { + mModelFilename = std::string(onnxFilename); + } + Verbosity getVerbosityLevel() const noexcept { return mVerbosity; } + void addVerbosity() noexcept { ++mVerbosity; } + void reduceVerbosity() noexcept { --mVerbosity; } + virtual void setVerbosityLevel(Verbosity v) noexcept { mVerbosity = v; } + const char* getEngineFileName() const noexcept { + return mEngineFilename.c_str(); + } + void setEngineFileName(const char* engineFilename) noexcept { + mEngineFilename = std::string(engineFilename); + } + const char* getTextFileName() const noexcept { return mTextFilename.c_str(); } + void setTextFileName(const char* textFilename) noexcept { + mTextFilename = std::string(textFilename); + } + const char* getFullTextFileName() const noexcept { + return mFullTextFilename.c_str(); + } + void setFullTextFileName(const char* fullTextFilename) noexcept { + mFullTextFilename = std::string(fullTextFilename); + } + void setLabel(int64_t label) noexcept { mLabel = label; } //!< set the Label + + int64_t getLabel() const noexcept { return mLabel; } //!< get the Label + + bool getPrintLayerInfo() const noexcept { return mPrintLayercInfo; } + + void setPrintLayerInfo(bool b) noexcept { + mPrintLayercInfo = b; + } //!< get the boolean variable corresponding to the Layer Info, see + //! getPrintLayerInfo() + + void setMaxBatchSize(int64_t maxBatchSize) noexcept { + mMaxBatchSize = maxBatchSize; + } //!< set the Max Batch Size + int64_t getMaxBatchSize() const noexcept { + return mMaxBatchSize; + } //!< get the Max Batch Size + + void setCalibBatchSize(int64_t CalibBatchSize) noexcept { + mCalibBatchSize = CalibBatchSize; + } //!< set the calibration batch size + int64_t getCalibBatchSize() const noexcept { + return mCalibBatchSize; + } //!< get calibration batch size + + void setMaxNCalibBatch(int64_t MaxNCalibBatch) noexcept { + mMaxNCalibBatch = MaxNCalibBatch; + } //!< set Max Number of Calibration Batches + int64_t getMaxNCalibBatch() const noexcept { + return mMaxNCalibBatch; + } //!< get the Max Number of Calibration Batches + + void setFirstCalibBatch(int64_t FirstCalibBatch) noexcept { + mFirstCalibBatch = FirstCalibBatch; + } //!< set the first calibration batch + int64_t getFirstCalibBatch() const noexcept { + return mFirstCalibBatch; + } //!< get the first calibration batch + + void setUseDLACore(int64_t UseDLACore) noexcept { + mUseDLACore = UseDLACore; + } //!< set the DLA core to use + int64_t getUseDLACore() const noexcept { + return mUseDLACore; + } //!< get the DLA core to use + + void setDebugBuilder() noexcept { + mDebugBuilder = true; + } //!< enable the Debug info, while building the engine. + bool getDebugBuilder() const noexcept { + return mDebugBuilder; + } //!< get the boolean variable, corresponding to the debug builder + + const char* + getImageFileName() const noexcept //!< set Image file name (PPM or ASCII) + { + return mImageFilename.c_str(); + } + void setImageFileName( + const char* imageFilename) noexcept //!< get the Image file name + { + mImageFilename = std::string(imageFilename); + } + const char* getReferenceFileName() const noexcept { + return mReferenceFilename.c_str(); + } + void setReferenceFileName( + const char* referenceFilename) noexcept //!< set reference file name + { + mReferenceFilename = std::string(referenceFilename); + } + + void setInputDataFormat(InputDataFormat idt) noexcept { + mInputDataFormat = idt; + } //!< specifies expected data format of the image file (PPM or ASCII) + InputDataFormat getInputDataFormat() const noexcept { + return mInputDataFormat; + } //!< returns the expected data format of the image file. + + const char* getOutputFileName() + const noexcept //!< specifies the file to save the results + { + return mOutputFilename.c_str(); + } + void setOutputFileName( + const char* outputFilename) noexcept //!< get the output file name + { + mOutputFilename = std::string(outputFilename); + } + + const char* getCalibrationFileName() const noexcept { + return mCalibrationFilename.c_str(); + } //!< specifies the file containing the list of image files for int8 + //! calibration + void setCalibrationFileName( + const char* calibrationFilename) noexcept //!< get the int 8 calibration + //! list file name + { + mCalibrationFilename = std::string(calibrationFilename); + } + + uint64_t getTopK() const noexcept { return mTopK; } + void setTopK(uint64_t topK) noexcept { + mTopK = topK; + } //!< If this options is specified, return the K top probabilities. + + float getFailurePercentage() const noexcept { return mFailurePercentage; } + + void setFailurePercentage(float f) noexcept { mFailurePercentage = f; } + + float getAbsoluteTolerance() const noexcept { return mAbsTolerance; } + + void setAbsoluteTolerance(float a) noexcept { mAbsTolerance = a; } + + float getTolerance() const noexcept { return mTolerance; } + + void setTolerance(float t) noexcept { mTolerance = t; } + + const char* getTimingCacheFilename() const noexcept { + return mTimingCacheFilename.c_str(); + } + + void setTimingCacheFileName(const char* timingCacheFilename) noexcept { + mTimingCacheFilename = std::string(timingCacheFilename); + } + + bool isDebug() const noexcept { +#if ONNX_DEBUG + return (std::getenv("ONNX_DEBUG") ? true : false); +#else + return false; +#endif + } + + void destroy() noexcept { delete this; } + +}; // class SampleConfig + +#endif diff --git a/fastdeploy/backends/tensorrt/common/sampleDevice.h b/fastdeploy/backends/tensorrt/common/sampleDevice.h new file mode 100644 index 0000000000..cdbb080196 --- /dev/null +++ b/fastdeploy/backends/tensorrt/common/sampleDevice.h @@ -0,0 +1,397 @@ +/* + * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef TRT_SAMPLE_DEVICE_H +#define TRT_SAMPLE_DEVICE_H + +#include +#include +#include +#include +#include + +namespace sample { + +inline void cudaCheck(cudaError_t ret, std::ostream& err = std::cerr) { + if (ret != cudaSuccess) { + err << "Cuda failure: " << cudaGetErrorString(ret) << std::endl; + abort(); + } +} + +class TrtCudaEvent; + +namespace { + +void cudaSleep(void* sleep) { + std::this_thread::sleep_for( + std::chrono::duration(*static_cast(sleep))); +} + +} // namespace + +//! +//! \class TrtCudaStream +//! \brief Managed CUDA stream +//! +class TrtCudaStream { + public: + TrtCudaStream() { cudaCheck(cudaStreamCreate(&mStream)); } + + TrtCudaStream(const TrtCudaStream&) = delete; + + TrtCudaStream& operator=(const TrtCudaStream&) = delete; + + TrtCudaStream(TrtCudaStream&&) = delete; + + TrtCudaStream& operator=(TrtCudaStream&&) = delete; + + ~TrtCudaStream() { cudaCheck(cudaStreamDestroy(mStream)); } + + cudaStream_t get() const { return mStream; } + + void synchronize() { cudaCheck(cudaStreamSynchronize(mStream)); } + + void wait(TrtCudaEvent& event); + + void sleep(float* ms) { + cudaCheck(cudaLaunchHostFunc(mStream, cudaSleep, ms)); + } + + private: + cudaStream_t mStream{}; +}; + +//! +//! \class TrtCudaEvent +//! \brief Managed CUDA event +//! +class TrtCudaEvent { + public: + explicit TrtCudaEvent(bool blocking = true) { + const uint32_t flags = blocking ? cudaEventBlockingSync : cudaEventDefault; + cudaCheck(cudaEventCreateWithFlags(&mEvent, flags)); + } + + TrtCudaEvent(const TrtCudaEvent&) = delete; + + TrtCudaEvent& operator=(const TrtCudaEvent&) = delete; + + TrtCudaEvent(TrtCudaEvent&&) = delete; + + TrtCudaEvent& operator=(TrtCudaEvent&&) = delete; + + ~TrtCudaEvent() { cudaCheck(cudaEventDestroy(mEvent)); } + + cudaEvent_t get() const { return mEvent; } + + void record(const TrtCudaStream& stream) { + cudaCheck(cudaEventRecord(mEvent, stream.get())); + } + + void synchronize() { cudaCheck(cudaEventSynchronize(mEvent)); } + + // Returns time elapsed time in milliseconds + float operator-(const TrtCudaEvent& e) const { + float time{0}; + cudaCheck(cudaEventElapsedTime(&time, e.get(), get())); + return time; + } + + private: + cudaEvent_t mEvent{}; +}; + +inline void TrtCudaStream::wait(TrtCudaEvent& event) { + cudaCheck(cudaStreamWaitEvent(mStream, event.get(), 0)); +} + +//! +//! \class TrtCudaGraph +//! \brief Managed CUDA graph +//! +class TrtCudaGraph { + public: + explicit TrtCudaGraph() = default; + + TrtCudaGraph(const TrtCudaGraph&) = delete; + + TrtCudaGraph& operator=(const TrtCudaGraph&) = delete; + + TrtCudaGraph(TrtCudaGraph&&) = delete; + + TrtCudaGraph& operator=(TrtCudaGraph&&) = delete; + + ~TrtCudaGraph() { + if (mGraphExec) { + cudaGraphExecDestroy(mGraphExec); + } + } + + void beginCapture(TrtCudaStream& stream) { + cudaCheck( + cudaStreamBeginCapture(stream.get(), cudaStreamCaptureModeThreadLocal)); + } + + bool launch(TrtCudaStream& stream) { + return cudaGraphLaunch(mGraphExec, stream.get()) == cudaSuccess; + } + + void endCapture(TrtCudaStream& stream) { + cudaCheck(cudaStreamEndCapture(stream.get(), &mGraph)); + cudaCheck(cudaGraphInstantiate(&mGraphExec, mGraph, nullptr, nullptr, 0)); + cudaCheck(cudaGraphDestroy(mGraph)); + } + + void endCaptureOnError(TrtCudaStream& stream) { + // There are two possibilities why stream capture would fail: + // (1) stream is in cudaErrorStreamCaptureInvalidated state. + // (2) TRT reports a failure. + // In case (1), the returning mGraph should be nullptr. + // In case (2), the returning mGraph is not nullptr, but it should not be + // used. + const auto ret = cudaStreamEndCapture(stream.get(), &mGraph); + if (ret == cudaErrorStreamCaptureInvalidated) { + assert(mGraph == nullptr); + } else { + assert(ret == cudaSuccess); + assert(mGraph != nullptr); + cudaCheck(cudaGraphDestroy(mGraph)); + mGraph = nullptr; + } + // Clean up any CUDA error. + cudaGetLastError(); + sample::gLogWarning << "The CUDA graph capture on the stream has failed." + << std::endl; + } + + private: + cudaGraph_t mGraph{}; + cudaGraphExec_t mGraphExec{}; +}; + +//! +//! \class TrtCudaBuffer +//! \brief Managed buffer for host and device +//! +template class TrtCudaBuffer { + public: + TrtCudaBuffer() = default; + + TrtCudaBuffer(const TrtCudaBuffer&) = delete; + + TrtCudaBuffer& operator=(const TrtCudaBuffer&) = delete; + + TrtCudaBuffer(TrtCudaBuffer&& rhs) { + reset(rhs.mPtr); + rhs.mPtr = nullptr; + } + + TrtCudaBuffer& operator=(TrtCudaBuffer&& rhs) { + if (this != &rhs) { + reset(rhs.mPtr); + rhs.mPtr = nullptr; + } + return *this; + } + + ~TrtCudaBuffer() { reset(); } + + TrtCudaBuffer(size_t size) { A()(&mPtr, size); } + + void allocate(size_t size) { + reset(); + A()(&mPtr, size); + } + + void reset(void* ptr = nullptr) { + if (mPtr) { + D()(mPtr); + } + mPtr = ptr; + } + + void* get() const { return mPtr; } + + private: + void* mPtr{nullptr}; +}; + +struct DeviceAllocator { + void operator()(void** ptr, size_t size) { cudaCheck(cudaMalloc(ptr, size)); } +}; + +struct DeviceDeallocator { + void operator()(void* ptr) { cudaCheck(cudaFree(ptr)); } +}; + +struct ManagedAllocator { + void operator()(void** ptr, size_t size) { + cudaCheck(cudaMallocManaged(ptr, size)); + } +}; + +struct HostAllocator { + void operator()(void** ptr, size_t size) { + cudaCheck(cudaMallocHost(ptr, size)); + } +}; + +struct HostDeallocator { + void operator()(void* ptr) { cudaCheck(cudaFreeHost(ptr)); } +}; + +using TrtDeviceBuffer = TrtCudaBuffer; +using TrtManagedBuffer = TrtCudaBuffer; + +using TrtHostBuffer = TrtCudaBuffer; + +//! +//! \class MirroredBuffer +//! \brief Coupled host and device buffers +//! +class IMirroredBuffer { + public: + //! + //! Allocate memory for the mirrored buffer give the size + //! of the allocation. + //! + virtual void allocate(size_t size) = 0; + + //! + //! Get the pointer to the device side buffer. + //! + //! \return pointer to device memory or nullptr if uninitialized. + //! + virtual void* getDeviceBuffer() const = 0; + + //! + //! Get the pointer to the host side buffer. + //! + //! \return pointer to host memory or nullptr if uninitialized. + //! + virtual void* getHostBuffer() const = 0; + + //! + //! Copy the memory from host to device. + //! + virtual void hostToDevice(TrtCudaStream& stream) = 0; + + //! + //! Copy the memory from device to host. + //! + virtual void deviceToHost(TrtCudaStream& stream) = 0; + + //! + //! Interface to get the size of the memory + //! + //! \return the size of memory allocated. + //! + virtual size_t getSize() const = 0; + + //! + //! Virtual destructor declaraion + //! + virtual ~IMirroredBuffer() = default; + +}; // class IMirroredBuffer + +//! +//! Class to have a seperate memory buffer for discrete device and host +//! allocations. +//! +class DiscreteMirroredBuffer : public IMirroredBuffer { + public: + void allocate(size_t size) { + mSize = size; + mHostBuffer.allocate(size); + mDeviceBuffer.allocate(size); + } + + void* getDeviceBuffer() const { return mDeviceBuffer.get(); } + + void* getHostBuffer() const { return mHostBuffer.get(); } + + void hostToDevice(TrtCudaStream& stream) { + cudaCheck(cudaMemcpyAsync(mDeviceBuffer.get(), mHostBuffer.get(), mSize, + cudaMemcpyHostToDevice, stream.get())); + } + + void deviceToHost(TrtCudaStream& stream) { + cudaCheck(cudaMemcpyAsync(mHostBuffer.get(), mDeviceBuffer.get(), mSize, + cudaMemcpyDeviceToHost, stream.get())); + } + + size_t getSize() const { return mSize; } + + private: + size_t mSize{0}; + TrtHostBuffer mHostBuffer; + TrtDeviceBuffer mDeviceBuffer; +}; // class DiscreteMirroredBuffer + +//! +//! Class to have a unified memory buffer for embedded devices. +//! +class UnifiedMirroredBuffer : public IMirroredBuffer { + public: + void allocate(size_t size) { + mSize = size; + mBuffer.allocate(size); + } + + void* getDeviceBuffer() const { return mBuffer.get(); } + + void* getHostBuffer() const { return mBuffer.get(); } + + void hostToDevice(TrtCudaStream& stream) { + // Does nothing since we are using unified memory. + } + + void deviceToHost(TrtCudaStream& stream) { + // Does nothing since we are using unified memory. + } + + size_t getSize() const { return mSize; } + + private: + size_t mSize{0}; + TrtManagedBuffer mBuffer; +}; // class UnifiedMirroredBuffer + +inline void setCudaDevice(int device, std::ostream& os) { + cudaCheck(cudaSetDevice(device)); + + cudaDeviceProp properties; + cudaCheck(cudaGetDeviceProperties(&properties, device)); + + // clang-format off + os << "=== Device Information ===" << std::endl; + os << "Selected Device: " << properties.name << std::endl; + os << "Compute Capability: " << properties.major << "." << properties.minor << std::endl; + os << "SMs: " << properties.multiProcessorCount << std::endl; + os << "Compute Clock Rate: " << properties.clockRate / 1000000.0F << " GHz" << std::endl; + os << "Device Global Memory: " << (properties.totalGlobalMem >> 20) << " MiB" << std::endl; + os << "Shared Memory per SM: " << (properties.sharedMemPerMultiprocessor >> 10) << " KiB" << std::endl; + os << "Memory Bus Width: " << properties.memoryBusWidth << " bits" + << " (ECC " << (properties.ECCEnabled != 0 ? "enabled" : "disabled") << ")" << std::endl; + os << "Memory Clock Rate: " << properties.memoryClockRate / 1000000.0F << " GHz" << std::endl; + // clang-format on +} + +} // namespace sample + +#endif // TRT_SAMPLE_DEVICE_H diff --git a/fastdeploy/backends/tensorrt/common/sampleEngines.cpp b/fastdeploy/backends/tensorrt/common/sampleEngines.cpp new file mode 100644 index 0000000000..32d0fa5650 --- /dev/null +++ b/fastdeploy/backends/tensorrt/common/sampleEngines.cpp @@ -0,0 +1,1708 @@ +/* + * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "NvCaffeParser.h" +#include "NvInfer.h" +#include "NvOnnxParser.h" + +#include "ErrorRecorder.h" +#include "common.h" +#include "half.h" +#include "logger.h" +#include "sampleEngines.h" +#include "sampleOptions.h" +#include "sampleUtils.h" + +#if !defined(_WIN32) +#include +#endif + +using namespace nvinfer1; + +namespace sample { + +namespace { + +struct CaffeBufferShutter { + ~CaffeBufferShutter() { nvcaffeparser1::shutdownProtobufLibrary(); } +}; + +std::map +readScalesFromCalibrationCache(const std::string& calibrationFile) { + std::map tensorScales; + std::ifstream cache{calibrationFile}; + if (!cache.is_open()) { + sample::gLogError << "[TRT] Can not open provided calibration cache file" + << std::endl; + return tensorScales; + } + std::string line; + while (std::getline(cache, line)) { + auto colonPos = line.find_last_of(':'); + if (colonPos != std::string::npos) { + // Scales should be stored in calibration cache as 32-bit floating numbers + // encoded as 32-bit integers + int32_t scalesAsInt = + std::stoi(line.substr(colonPos + 2, 8), nullptr, 16); + const auto tensorName = line.substr(0, colonPos); + tensorScales[tensorName] = *reinterpret_cast(&scalesAsInt); + } + } + cache.close(); + return tensorScales; +} +} // namespace + +void setTensorScalesFromCalibration(nvinfer1::INetworkDefinition& network, + const std::vector& inputFormats, + const std::vector& outputFormats, + const std::string& calibrationFile) { + const auto tensorScales = readScalesFromCalibrationCache(calibrationFile); + const bool broadcastInputFormats = + broadcastIOFormats(inputFormats, network.getNbInputs()); + for (int32_t i = 0, n = network.getNbInputs(); i < n; ++i) { + int32_t formatIdx = broadcastInputFormats ? 0 : i; + if (!inputFormats.empty() && + inputFormats[formatIdx].first == DataType::kINT8) { + auto* input = network.getInput(i); + const auto calibScale = tensorScales.at(input->getName()); + input->setDynamicRange(-127 * calibScale, 127 * calibScale); + } + } + const bool broadcastOutputFormats = + broadcastIOFormats(outputFormats, network.getNbInputs()); + for (int32_t i = 0, n = network.getNbOutputs(); i < n; ++i) { + int32_t formatIdx = broadcastOutputFormats ? 0 : i; + if (!outputFormats.empty() && + outputFormats[formatIdx].first == DataType::kINT8) { + auto* output = network.getOutput(i); + const auto calibScale = tensorScales.at(output->getName()); + output->setDynamicRange(-127 * calibScale, 127 * calibScale); + } + } +} + +#define SMP_RETVAL_IF_FALSE(condition, msg, retval, err) \ + { \ + if ((condition) == false) { \ + (err) << (msg) << std::endl; \ + return retval; \ + } \ + } + +Parser modelToNetwork(const ModelOptions& model, + nvinfer1::INetworkDefinition& network, + std::ostream& err) { + sample::gLogInfo << "Start parsing network model" << std::endl; + Parser parser; + const std::string& modelName = model.baseModel.model; + switch (model.baseModel.format) { + case ModelFormat::kCAFFE: { + using namespace nvcaffeparser1; + parser.caffeParser.reset(createCaffeParser()); + CaffeBufferShutter bufferShutter; + const auto* const blobNameToTensor = parser.caffeParser->parse( + model.prototxt.c_str(), modelName.empty() ? nullptr : modelName.c_str(), + network, DataType::kFLOAT); + if (!blobNameToTensor) { + err << "Failed to parse caffe model or prototxt, tensors blob not found" + << std::endl; + parser.caffeParser.reset(); + break; + } + + for (const auto& s : model.outputs) { + if (blobNameToTensor->find(s.c_str()) == nullptr) { + err << "Could not find output blob " << s << std::endl; + parser.caffeParser.reset(); + break; + } + network.markOutput(*blobNameToTensor->find(s.c_str())); + } + break; + } + case ModelFormat::kONNX: { + using namespace nvonnxparser; + parser.onnxParser.reset( + createParser(network, sample::gLogger.getTRTLogger())); + if (!parser.onnxParser->parseFromFile( + model.baseModel.model.c_str(), + static_cast(sample::gLogger.getReportableSeverity()))) { + err << "Failed to parse onnx file" << std::endl; + parser.onnxParser.reset(); + } + break; + } + case ModelFormat::kANY: + break; + } + + sample::gLogInfo << "Finish parsing network model" << std::endl; + return parser; +} + +namespace { + +class RndInt8Calibrator : public nvinfer1::IInt8EntropyCalibrator2 { + public: + RndInt8Calibrator(int batches, std::vector& elemCount, + const std::string& cacheFile, + const nvinfer1::INetworkDefinition& network, + std::ostream& err); + + ~RndInt8Calibrator() { + for (auto& elem : mInputDeviceBuffers) { + cudaCheck(cudaFree(elem.second), mErr); + } + } + + bool getBatch(void* bindings[], const char* names[], + int nbBindings) noexcept override; + + int getBatchSize() const noexcept override { return 1; } + + const void* readCalibrationCache(size_t& length) noexcept override; + + virtual void writeCalibrationCache(const void*, size_t) noexcept override {} + + private: + int mBatches{}; + int mCurrentBatch{}; + std::string mCacheFile; + std::map mInputDeviceBuffers; + std::vector mCalibrationCache; + std::ostream& mErr; +}; + +RndInt8Calibrator::RndInt8Calibrator(int batches, + std::vector& elemCount, + const std::string& cacheFile, + const INetworkDefinition& network, + std::ostream& err) + : mBatches(batches), mCurrentBatch(0), mCacheFile(cacheFile), mErr(err) { + std::ifstream tryCache(cacheFile, std::ios::binary); + if (tryCache.good()) { + return; + } + + std::default_random_engine generator; + std::uniform_real_distribution distribution(-1.0F, 1.0F); + auto gen = [&generator, &distribution]() { return distribution(generator); }; + + for (int i = 0; i < network.getNbInputs(); i++) { + auto* input = network.getInput(i); + std::vector rnd_data(elemCount[i]); + std::generate_n(rnd_data.begin(), elemCount[i], gen); + + void* data; + cudaCheck(cudaMalloc(&data, elemCount[i] * sizeof(float)), mErr); + cudaCheck(cudaMemcpy(data, rnd_data.data(), elemCount[i] * sizeof(float), + cudaMemcpyHostToDevice), + mErr); + + mInputDeviceBuffers.insert(std::make_pair(input->getName(), data)); + } +} + +bool RndInt8Calibrator::getBatch(void* bindings[], const char* names[], + int nbBindings) noexcept { + if (mCurrentBatch >= mBatches) { + return false; + } + + for (int i = 0; i < nbBindings; ++i) { + bindings[i] = mInputDeviceBuffers[names[i]]; + } + + ++mCurrentBatch; + + return true; +} + +const void* RndInt8Calibrator::readCalibrationCache(size_t& length) noexcept { + mCalibrationCache.clear(); + std::ifstream input(mCacheFile, std::ios::binary); + input >> std::noskipws; + if (input.good()) { + std::copy(std::istream_iterator(input), std::istream_iterator(), + std::back_inserter(mCalibrationCache)); + } + + length = mCalibrationCache.size(); + return !mCalibrationCache.empty() ? mCalibrationCache.data() : nullptr; +} + +bool setTensorDynamicRange(const INetworkDefinition& network, + float inRange = 2.0F, float outRange = 4.0F) { + // Ensure that all layer inputs have a dynamic range. + for (int l = 0; l < network.getNbLayers(); l++) { + auto* layer = network.getLayer(l); + for (int i = 0; i < layer->getNbInputs(); i++) { + ITensor* input{layer->getInput(i)}; + // Optional inputs are nullptr here and are from RNN layers. + if (input && !input->dynamicRangeIsSet()) { + // Concat should propagate dynamic range from outputs to inputs to avoid + // Re-quantization during the concatenation + auto dynRange = (layer->getType() == LayerType::kCONCATENATION) + ? outRange + : inRange; + if (!input->setDynamicRange(-dynRange, dynRange)) { + return false; + } + } + } + for (int o = 0; o < layer->getNbOutputs(); o++) { + ITensor* output{layer->getOutput(o)}; + // Optional outputs are nullptr here and are from RNN layers. + if (output && !output->dynamicRangeIsSet()) { + // Pooling must have the same input and output dynamic range. + if (layer->getType() == LayerType::kPOOLING) { + if (!output->setDynamicRange(-inRange, inRange)) { + return false; + } + } else { + if (!output->setDynamicRange(-outRange, outRange)) { + return false; + } + } + } + } + } + return true; +} + +// Walk the weights elements and overwrite (at most) 2 out of 4 elements to 0. +template +void sparsify(const T* values, int64_t count, int32_t k, int32_t rs, + std::vector& sparseWeights) { + const auto c = count / (k * rs); + sparseWeights.resize(count * sizeof(T)); + auto* sparseValues = reinterpret_cast(sparseWeights.data()); + + constexpr int32_t window = 4; + constexpr int32_t nonzeros = 2; + + const int32_t crs = c * rs; + const auto getIndex = [=](int32_t ki, int32_t ci, int32_t rsi) { + return ki * crs + ci * rs + rsi; + }; + + for (int64_t ki = 0; ki < k; ++ki) { + for (int64_t rsi = 0; rsi < rs; ++rsi) { + int32_t w = 0; + int32_t nz = 0; + for (int64_t ci = 0; ci < c; ++ci) { + const auto index = getIndex(ki, ci, rsi); + if (nz < nonzeros) { + sparseValues[index] = values[index]; + ++nz; + } else { + sparseValues[index] = 0; + } + if (++w == window) { + w = 0; + nz = 0; + } + } + } + } +} + +void sparsify(const Weights& weights, int32_t k, int32_t rs, + std::vector& sparseWeights) { + switch (weights.type) { + case DataType::kFLOAT: + sparsify(static_cast(weights.values), weights.count, k, rs, + sparseWeights); + break; + case DataType::kHALF: + sparsify(static_cast(weights.values), + weights.count, k, rs, sparseWeights); + break; + case DataType::kINT8: + case DataType::kINT32: + case DataType::kBOOL: + break; + } +} + +template +void setSparseWeights(L& l, int32_t k, int32_t rs, + std::vector& sparseWeights) { + auto weights = l.getKernelWeights(); + sparsify(weights, k, rs, sparseWeights); + weights.values = sparseWeights.data(); + l.setKernelWeights(weights); +} + +template +void transpose2DWeights(void* dst, void const* src, int32_t const m, + int32_t const n) { + ASSERT(dst != src); + T* tdst = reinterpret_cast(dst); + T const* tsrc = reinterpret_cast(src); + for (int32_t mi = 0; mi < m; ++mi) { + for (int32_t ni = 0; ni < n; ++ni) { + int32_t const isrc = mi * n + ni; + int32_t const idst = ni * m + mi; + tdst[idst] = tsrc[isrc]; + } + } +} + +// Sparsify the weights of Constant layers that are fed to MatMul via Shuffle +// layers. +// Forward analysis on the API graph to determine which weights to sparsify. +void sparsifyMatMulKernelWeights( + INetworkDefinition& network, + std::vector>& sparseWeights) { + using TensorToLayer = std::unordered_map; + using LayerToTensor = std::unordered_map; + + // 1. Collect layers and tensors information from the network. + TensorToLayer matmulI2L; + TensorToLayer constO2L; + TensorToLayer shuffleI2L; + LayerToTensor shuffleL2O; + auto collectMappingInfo = [&](int32_t const idx) { + ILayer* l = network.getLayer(idx); + switch (l->getType()) { + case LayerType::kMATRIX_MULTIPLY: { + // assume weights on the second input. + matmulI2L.insert({l->getInput(1), l}); + break; + } + case LayerType::kCONSTANT: { + DataType const dtype = static_cast(l)->getWeights().type; + if (dtype == DataType::kFLOAT || dtype == DataType::kHALF) { + // Sparsify float only. + constO2L.insert({l->getOutput(0), l}); + } + break; + } + case LayerType::kSHUFFLE: { + shuffleI2L.insert({l->getInput(0), l}); + shuffleL2O.insert({l, l->getOutput(0)}); + break; + } + default: + break; + } + }; + int32_t const nbLayers = network.getNbLayers(); + for (int32_t i = 0; i < nbLayers; ++i) { + collectMappingInfo(i); + } + if (matmulI2L.size() == 0 || constO2L.size() == 0) { + // No MatrixMultiply or Constant layer found, no weights to sparsify. + return; + } + + // Helper for analysis + auto isTranspose = [](Permutation const& perm) -> bool { + return (perm.order[0] == 1 && perm.order[1] == 0); + }; + auto is2D = [](Dims const& dims) -> bool { return dims.nbDims == 2; }; + auto isIdenticalReshape = [](Dims const& dims) -> bool { + for (int32_t i = 0; i < dims.nbDims; ++i) { + if (dims.d[i] != i || dims.d[i] != -1) { + return false; + } + } + return true; + }; + auto tensorReachedViaTranspose = [&](ITensor* t, + bool& needTranspose) -> ITensor* { + while (shuffleI2L.find(t) != shuffleI2L.end()) { + IShuffleLayer* s = static_cast(shuffleI2L.at(t)); + if (!is2D(s->getInput(0)->getDimensions()) || + !is2D(s->getReshapeDimensions()) || + !isIdenticalReshape(s->getReshapeDimensions())) { + break; + } + + if (isTranspose(s->getFirstTranspose())) { + needTranspose = !needTranspose; + } + if (isTranspose(s->getSecondTranspose())) { + needTranspose = !needTranspose; + } + + t = shuffleL2O.at(s); + } + return t; + }; + + // 2. Forward analysis to collect the Constant layers connected to MatMul via + // Transpose + std::unordered_map constantLayerToSparse; + for (auto& o2l : constO2L) { + // If need to transpose the weights of the Constant layer. + // Need to transpose by default due to semantic difference. + bool needTranspose{true}; + ITensor* t = tensorReachedViaTranspose(o2l.first, needTranspose); + if (matmulI2L.find(t) == matmulI2L.end()) { + continue; + } + + // check MatMul params... + IMatrixMultiplyLayer* mm = + static_cast(matmulI2L.at(t)); + bool const twoInputs = mm->getNbInputs() == 2; + bool const all2D = is2D(mm->getInput(0)->getDimensions()) && + is2D(mm->getInput(1)->getDimensions()); + bool const isSimple = mm->getOperation(0) == MatrixOperation::kNONE && + mm->getOperation(1) != MatrixOperation::kVECTOR; + if (!(twoInputs && all2D && isSimple)) { + continue; + } + if (mm->getOperation(1) == MatrixOperation::kTRANSPOSE) { + needTranspose = !needTranspose; + } + + constantLayerToSparse.insert( + {static_cast(o2l.second), needTranspose}); + } + + // 3. Finally, sparsify the weights + auto sparsifyConstantWeights = [&sparseWeights](IConstantLayer* layer, + bool const needTranspose) { + Dims dims = layer->getOutput(0)->getDimensions(); + ASSERT(dims.nbDims == 2); + int32_t const idxN = needTranspose ? 1 : 0; + int32_t const n = dims.d[idxN]; + int32_t const k = dims.d[1 - idxN]; + sparseWeights.emplace_back(); + std::vector& spw = sparseWeights.back(); + Weights w = layer->getWeights(); + DataType const dtype = w.type; + ASSERT(dtype == DataType::kFLOAT || + dtype == + DataType::kHALF); // non-float weights should have been ignored. + + if (needTranspose) { + if (dtype == DataType::kFLOAT) { + spw.resize(w.count * sizeof(float)); + transpose2DWeights(spw.data(), w.values, k, n); + } else if (dtype == DataType::kHALF) { + spw.resize(w.count * sizeof(half_float::half)); + transpose2DWeights(spw.data(), w.values, k, n); + } + + w.values = spw.data(); + std::vector tmpW; + sparsify(w, n, 1, tmpW); + + if (dtype == DataType::kFLOAT) { + transpose2DWeights(spw.data(), tmpW.data(), n, k); + } else if (dtype == DataType::kHALF) { + transpose2DWeights(spw.data(), tmpW.data(), n, k); + } + } else { + sparsify(w, n, 1, spw); + } + + w.values = spw.data(); + layer->setWeights(w); + }; + for (auto& l : constantLayerToSparse) { + sparsifyConstantWeights(l.first, l.second); + } +} + +void sparsify(INetworkDefinition& network, + std::vector>& sparseWeights) { + for (int32_t l = 0; l < network.getNbLayers(); ++l) { + auto* layer = network.getLayer(l); + const auto t = layer->getType(); + if (t == LayerType::kCONVOLUTION) { + auto& conv = *static_cast(layer); + const auto& dims = conv.getKernelSizeNd(); + if (dims.nbDims > 2) { + continue; + } + const auto k = conv.getNbOutputMaps(); + const auto rs = dims.d[0] * dims.d[1]; + sparseWeights.emplace_back(); + setSparseWeights(conv, k, rs, sparseWeights.back()); + } else if (t == LayerType::kFULLY_CONNECTED) { + auto& fc = *static_cast(layer); + const auto k = fc.getNbOutputChannels(); + sparseWeights.emplace_back(); + setSparseWeights(fc, k, 1, sparseWeights.back()); + } + } + + sparsifyMatMulKernelWeights(network, sparseWeights); +} + +void setLayerPrecisions(INetworkDefinition& network, + LayerPrecisions const& layerPrecisions) { + bool const hasGlobalPrecision{layerPrecisions.find("*") != + layerPrecisions.end()}; + auto const globalPrecision = + hasGlobalPrecision ? layerPrecisions.at("*") : nvinfer1::DataType::kFLOAT; + bool hasLayerPrecisionSkipped{false}; + for (int32_t layerIdx = 0; layerIdx < network.getNbLayers(); ++layerIdx) { + auto* layer = network.getLayer(layerIdx); + auto const layerName = layer->getName(); + if (layerPrecisions.find(layer->getName()) != layerPrecisions.end()) { + layer->setPrecision(layerPrecisions.at(layer->getName())); + } else if (hasGlobalPrecision) { + // We should not set the layer precision if its default precision is INT32 + // or Bool. + if (layer->getPrecision() == nvinfer1::DataType::kINT32 || + layer->getPrecision() == nvinfer1::DataType::kBOOL) { + hasLayerPrecisionSkipped = true; + sample::gLogVerbose << "Skipped setting precision for layer " + << layerName << " because the " + << " default layer precision is INT32 or Bool." + << std::endl; + continue; + } + // We should not set the constant layer precision if its weights are in + // INT32. + if (layer->getType() == nvinfer1::LayerType::kCONSTANT && + static_cast(layer)->getWeights().type == + nvinfer1::DataType::kINT32) { + hasLayerPrecisionSkipped = true; + sample::gLogVerbose << "Skipped setting precision for layer " + << layerName << " because this " + << "constant layer has INT32 weights." << std::endl; + continue; + } + // We should not set the layer precision if the layer operates on a shape + // tensor. + if (layer->getNbInputs() >= 1 && layer->getInput(0)->isShapeTensor()) { + hasLayerPrecisionSkipped = true; + sample::gLogVerbose << "Skipped setting precision for layer " + << layerName << " because this layer " + << "operates on a shape tensor." << std::endl; + continue; + } + if ((layer->getType() == nvinfer1::LayerType::kIDENTITY || + layer->getType() == nvinfer1::LayerType::kSHUFFLE) && + layer->getNbInputs() >= 1 && + layer->getInput(0)->getType() == nvinfer1::DataType::kINT32 && + layer->getNbOutputs() >= 1 && + layer->getOutput(0)->getType() == nvinfer1::DataType::kINT32) { + hasLayerPrecisionSkipped = true; + sample::gLogVerbose << "Skipped setting precision for layer " + << layerName << " because this " + << "layer has INT32 input and output." << std::endl; + continue; + } + // All heuristics passed. Set the layer precision. + layer->setPrecision(globalPrecision); + } + } + + if (hasLayerPrecisionSkipped) { + sample::gLogInfo << "Skipped setting precisions for some layers. Check " + "verbose logs for more details." + << std::endl; + } +} + +void setLayerOutputTypes(INetworkDefinition& network, + LayerOutputTypes const& layerOutputTypes) { + bool const hasGlobalOutputType{layerOutputTypes.find("*") != + layerOutputTypes.end()}; + auto const globalOutputType = hasGlobalOutputType + ? layerOutputTypes.at("*").at(0) + : nvinfer1::DataType::kFLOAT; + bool hasLayerOutputTypeSkipped{false}; + for (int32_t layerIdx = 0; layerIdx < network.getNbLayers(); ++layerIdx) { + auto* layer = network.getLayer(layerIdx); + auto const layerName = layer->getName(); + auto const nbOutputs = layer->getNbOutputs(); + if (layerOutputTypes.find(layer->getName()) != layerOutputTypes.end()) { + auto const& outputTypes = layerOutputTypes.at(layer->getName()); + bool const isBroadcast = (outputTypes.size() == 1); + if (!isBroadcast && + static_cast(outputTypes.size()) != nbOutputs) { + sample::gLogError + << "Layer " << layerName << " has " << nbOutputs << " outputs but " + << outputTypes.size() + << " output types are given in --layerOutputTypes flag." + << std::endl; + throw std::invalid_argument("Invalid --layerOutputTypes flag."); + } + for (int32_t outputIdx = 0; outputIdx < nbOutputs; ++outputIdx) { + layer->setOutputType(outputIdx, + outputTypes.at(isBroadcast ? 0 : outputIdx)); + } + } else if (hasGlobalOutputType) { + // We should not set the layer output types if its default precision is + // INT32 or Bool. + if (layer->getPrecision() == nvinfer1::DataType::kINT32 || + layer->getPrecision() == nvinfer1::DataType::kBOOL) { + hasLayerOutputTypeSkipped = true; + sample::gLogVerbose << "Skipped setting output types for layer " + << layerName << " because the " + << " default layer precision is INT32 or Bool." + << std::endl; + continue; + } + // We should not set the constant layer output types if its weights are in + // INT32. + if (layer->getType() == nvinfer1::LayerType::kCONSTANT && + static_cast(layer)->getWeights().type == + nvinfer1::DataType::kINT32) { + hasLayerOutputTypeSkipped = true; + sample::gLogVerbose << "Skipped setting output types for layer " + << layerName << " because this " + << "constant layer has INT32 weights." << std::endl; + continue; + } + for (int32_t outputIdx = 0; outputIdx < nbOutputs; ++outputIdx) { + // We should not set the output type if the output is a shape tensor. + if (layer->getOutput(0)->isShapeTensor()) { + hasLayerOutputTypeSkipped = true; + sample::gLogVerbose << "Skipped setting output type for output " + << outputIdx << " of layer " << layerName + << " because it is a shape tensor." << std::endl; + continue; + } + layer->setOutputType(outputIdx, globalOutputType); + } + } + } + + if (hasLayerOutputTypeSkipped) { + sample::gLogInfo << "Skipped setting output types for some layers. Check " + "verbose logs for more details." + << std::endl; + } +} + +void setMemoryPoolLimits(IBuilderConfig& config, BuildOptions const& build) { + auto const roundToBytes = [](double const sizeInMB) { + return static_cast(sizeInMB * (1 << 20)); + }; + if (build.workspace >= 0) { + config.setMemoryPoolLimit(MemoryPoolType::kWORKSPACE, + roundToBytes(build.workspace)); + } + if (build.dlaSRAM >= 0) { + config.setMemoryPoolLimit(MemoryPoolType::kDLA_MANAGED_SRAM, + roundToBytes(build.dlaSRAM)); + } + if (build.dlaLocalDRAM >= 0) { + config.setMemoryPoolLimit(MemoryPoolType::kDLA_LOCAL_DRAM, + roundToBytes(build.dlaLocalDRAM)); + } + if (build.dlaGlobalDRAM >= 0) { + config.setMemoryPoolLimit(MemoryPoolType::kDLA_GLOBAL_DRAM, + roundToBytes(build.dlaGlobalDRAM)); + } +} + +} // namespace + +bool setupNetworkAndConfig(const BuildOptions& build, const SystemOptions& sys, + IBuilder& builder, INetworkDefinition& network, + IBuilderConfig& config, std::ostream& err, + std::vector>& sparseWeights) { + IOptimizationProfile* profile{nullptr}; + if (build.maxBatch) { + builder.setMaxBatchSize(build.maxBatch); + } else { + profile = builder.createOptimizationProfile(); + } + + bool hasDynamicShapes{false}; + + bool broadcastInputFormats = + broadcastIOFormats(build.inputFormats, network.getNbInputs()); + + if (profile) { + // Check if the provided input tensor names match the input tensors of the + // engine. + // Throw an error if the provided input tensor names cannot be found because + // it implies a potential typo. + for (const auto& shape : build.shapes) { + bool tensorNameFound{false}; + for (int32_t i = 0; i < network.getNbInputs(); ++i) { + if (network.getInput(i)->getName() == shape.first) { + tensorNameFound = true; + break; + } + } + if (!tensorNameFound) { + sample::gLogError + << "Cannot find input tensor with name \"" << shape.first + << "\" in the network " + << "inputs! Please make sure the input tensor names are correct." + << std::endl; + return false; + } + } + } + + for (uint32_t i = 0, n = network.getNbInputs(); i < n; i++) { + // Set formats and data types of inputs + auto* input = network.getInput(i); + if (!build.inputFormats.empty()) { + int inputFormatIndex = broadcastInputFormats ? 0 : i; + input->setType(build.inputFormats[inputFormatIndex].first); + input->setAllowedFormats(build.inputFormats[inputFormatIndex].second); + } else { + switch (input->getType()) { + case DataType::kINT32: + case DataType::kBOOL: + case DataType::kHALF: + // Leave these as is. + break; + case DataType::kFLOAT: + case DataType::kINT8: + // User did not specify a floating-point format. Default to kFLOAT. + input->setType(DataType::kFLOAT); + break; + } + input->setAllowedFormats(1U << static_cast(TensorFormat::kLINEAR)); + } + + if (profile) { + auto const dims = input->getDimensions(); + auto const isScalar = dims.nbDims == 0; + auto const isDynamicInput = + std::any_of(dims.d, dims.d + dims.nbDims, + [](int32_t dim) { return dim == -1; }) || + input->isShapeTensor(); + if (isDynamicInput) { + hasDynamicShapes = true; + auto shape = build.shapes.find(input->getName()); + ShapeRange shapes{}; + + // If no shape is provided, set dynamic dimensions to 1. + if (shape == build.shapes.end()) { + constexpr int DEFAULT_DIMENSION = 1; + std::vector staticDims; + if (input->isShapeTensor()) { + if (isScalar) { + staticDims.push_back(1); + } else { + staticDims.resize(dims.d[0]); + std::fill(staticDims.begin(), staticDims.end(), + DEFAULT_DIMENSION); + } + } else { + staticDims.resize(dims.nbDims); + std::transform(dims.d, dims.d + dims.nbDims, staticDims.begin(), + [&](int dimension) { + return dimension > 0 ? dimension + : DEFAULT_DIMENSION; + }); + } + sample::gLogWarning + << "Dynamic dimensions required for input: " << input->getName() + << ", but no shapes were provided. Automatically overriding " + "shape to: " + << staticDims << std::endl; + std::fill(shapes.begin(), shapes.end(), staticDims); + } else { + shapes = shape->second; + } + + std::vector profileDims{}; + if (input->isShapeTensor()) { + profileDims = shapes[static_cast(OptProfileSelector::kMIN)]; + SMP_RETVAL_IF_FALSE(profile->setShapeValues( + input->getName(), OptProfileSelector::kMIN, + profileDims.data(), + static_cast(profileDims.size())), + "Error in set shape values MIN", false, err); + profileDims = shapes[static_cast(OptProfileSelector::kOPT)]; + SMP_RETVAL_IF_FALSE(profile->setShapeValues( + input->getName(), OptProfileSelector::kOPT, + profileDims.data(), + static_cast(profileDims.size())), + "Error in set shape values OPT", false, err); + profileDims = shapes[static_cast(OptProfileSelector::kMAX)]; + SMP_RETVAL_IF_FALSE(profile->setShapeValues( + input->getName(), OptProfileSelector::kMAX, + profileDims.data(), + static_cast(profileDims.size())), + "Error in set shape values MAX", false, err); + } else { + profileDims = shapes[static_cast(OptProfileSelector::kMIN)]; + SMP_RETVAL_IF_FALSE( + profile->setDimensions(input->getName(), OptProfileSelector::kMIN, + toDims(profileDims)), + "Error in set dimensions to profile MIN", false, err); + profileDims = shapes[static_cast(OptProfileSelector::kOPT)]; + SMP_RETVAL_IF_FALSE( + profile->setDimensions(input->getName(), OptProfileSelector::kOPT, + toDims(profileDims)), + "Error in set dimensions to profile OPT", false, err); + profileDims = shapes[static_cast(OptProfileSelector::kMAX)]; + SMP_RETVAL_IF_FALSE( + profile->setDimensions(input->getName(), OptProfileSelector::kMAX, + toDims(profileDims)), + "Error in set dimensions to profile MAX", false, err); + } + } + } + } + + if (!hasDynamicShapes && !build.shapes.empty()) { + sample::gLogError << "Static model does not take explicit shapes since the " + "shape of inference tensors will be " + "determined by the model itself" + << std::endl; + return false; + } + + if (profile && hasDynamicShapes) { + SMP_RETVAL_IF_FALSE(profile->isValid(), + "Required optimization profile is invalid", false, err); + SMP_RETVAL_IF_FALSE(config.addOptimizationProfile(profile) != -1, + "Error in add optimization profile", false, err); + } + + bool broadcastOutputFormats = + broadcastIOFormats(build.outputFormats, network.getNbOutputs(), false); + + for (uint32_t i = 0, n = network.getNbOutputs(); i < n; i++) { + // Set formats and data types of outputs + auto* output = network.getOutput(i); + if (!build.outputFormats.empty()) { + int outputFormatIndex = broadcastOutputFormats ? 0 : i; + output->setType(build.outputFormats[outputFormatIndex].first); + output->setAllowedFormats(build.outputFormats[outputFormatIndex].second); + } else { + output->setAllowedFormats(1U << static_cast(TensorFormat::kLINEAR)); + } + } + + setMemoryPoolLimits(config, build); + + if (build.timingCacheMode == TimingCacheMode::kDISABLE) { + config.setFlag(BuilderFlag::kDISABLE_TIMING_CACHE); + } + + if (!build.tf32) { + config.clearFlag(BuilderFlag::kTF32); + } + + if (build.refittable) { + config.setFlag(BuilderFlag::kREFIT); + } + + if (build.sparsity != SparsityFlag::kDISABLE) { + config.setFlag(BuilderFlag::kSPARSE_WEIGHTS); + if (build.sparsity == SparsityFlag::kFORCE) { + sparsify(network, sparseWeights); + } + } + + config.setProfilingVerbosity(build.profilingVerbosity); + config.setMinTimingIterations(build.minTiming); + config.setAvgTimingIterations(build.avgTiming); + + if (build.fp16) { + config.setFlag(BuilderFlag::kFP16); + } + + if (build.int8) { + config.setFlag(BuilderFlag::kINT8); + } + + if (build.int8 && !build.fp16) { + sample::gLogInfo << "FP32 and INT8 precisions have been specified - more " + "performance might be enabled by additionally " + "specifying --fp16 or --best" + << std::endl; + } + + auto isInt8 = [](const IOFormat& format) { + return format.first == DataType::kINT8; + }; + auto int8IO = std::count_if(build.inputFormats.begin(), + build.inputFormats.end(), isInt8) + + std::count_if(build.outputFormats.begin(), + build.outputFormats.end(), isInt8); + + auto hasQDQLayers = [](INetworkDefinition& network) { + // Determine if our network has QDQ layers. + const auto nbLayers = network.getNbLayers(); + for (int32_t i = 0; i < nbLayers; i++) { + const auto& layer = network.getLayer(i); + if (layer->getType() == LayerType::kQUANTIZE || + layer->getType() == LayerType::kDEQUANTIZE) { + return true; + } + } + return false; + }; + + if (!hasQDQLayers(network) && (build.int8 || int8IO) && + build.calibration.empty()) { + // Explicitly set int8 scales if no calibrator is provided and if I/O + // tensors use int8, + // because auto calibration does not support this case. + SMP_RETVAL_IF_FALSE(setTensorDynamicRange(network), + "Error in set tensor dynamic range.", false, err); + } else if (build.int8) { + if (!hasQDQLayers(network) && int8IO) { + try { + // Set dynamic ranges of int8 inputs / outputs to match scales loaded + // from calibration cache + // TODO http://nvbugs/3262234 Change the network validation so that this + // workaround can be removed + setTensorScalesFromCalibration(network, build.inputFormats, + build.outputFormats, build.calibration); + } catch (std::exception&) { + sample::gLogError << "Int8IO was specified but impossible to read " + "tensor scales from provided calibration cache " + "file" + << std::endl; + return false; + } + } + IOptimizationProfile* profileCalib{nullptr}; + if (!build.shapesCalib.empty()) { + profileCalib = builder.createOptimizationProfile(); + for (uint32_t i = 0, n = network.getNbInputs(); i < n; i++) { + auto* input = network.getInput(i); + Dims profileDims{}; + auto shape = build.shapesCalib.find(input->getName()); + ShapeRange shapesCalib{}; + shapesCalib = shape->second; + + profileDims = + toDims(shapesCalib[static_cast(OptProfileSelector::kOPT)]); + // Here we check only kMIN as all profileDims are the same. + SMP_RETVAL_IF_FALSE( + profileCalib->setDimensions(input->getName(), + OptProfileSelector::kMIN, profileDims), + "Error in set dimensions to calibration profile OPT", false, err); + profileCalib->setDimensions(input->getName(), OptProfileSelector::kOPT, + profileDims); + profileCalib->setDimensions(input->getName(), OptProfileSelector::kMAX, + profileDims); + } + SMP_RETVAL_IF_FALSE(profileCalib->isValid(), + "Calibration profile is invalid", false, err); + SMP_RETVAL_IF_FALSE(config.setCalibrationProfile(profileCalib), + "Error in set calibration profile", false, err); + } + + std::vector elemCount{}; + for (int i = 0; i < network.getNbInputs(); i++) { + auto* input = network.getInput(i); + auto const dims = input->getDimensions(); + auto const isDynamicInput = std::any_of( + dims.d, dims.d + dims.nbDims, [](int32_t dim) { return dim == -1; }); + + if (profileCalib) { + elemCount.push_back(volume(profileCalib->getDimensions( + input->getName(), OptProfileSelector::kOPT))); + } else if (profile && isDynamicInput) { + elemCount.push_back(volume(profile->getDimensions( + input->getName(), OptProfileSelector::kOPT))); + } else { + elemCount.push_back(volume(input->getDimensions())); + } + } + + config.setInt8Calibrator( + new RndInt8Calibrator(1, elemCount, build.calibration, network, err)); + } + + if (build.directIO) { + config.setFlag(BuilderFlag::kDIRECT_IO); + } + + switch (build.precisionConstraints) { + case PrecisionConstraints::kNONE: + // It's the default for TensorRT. + break; + case PrecisionConstraints::kOBEY: + config.setFlag(BuilderFlag::kOBEY_PRECISION_CONSTRAINTS); + break; + case PrecisionConstraints::kPREFER: + config.setFlag(BuilderFlag::kPREFER_PRECISION_CONSTRAINTS); + break; + } + + if (!build.layerPrecisions.empty() && + build.precisionConstraints != PrecisionConstraints::kNONE) { + setLayerPrecisions(network, build.layerPrecisions); + } + + if (!build.layerOutputTypes.empty() && + build.precisionConstraints != PrecisionConstraints::kNONE) { + setLayerOutputTypes(network, build.layerOutputTypes); + } + + if (build.safe) { + config.setEngineCapability(sys.DLACore != -1 + ? EngineCapability::kDLA_STANDALONE + : EngineCapability::kSAFETY); + } + + if (build.restricted) { + config.setFlag(BuilderFlag::kSAFETY_SCOPE); + } + + if (sys.DLACore != -1) { + if (sys.DLACore < builder.getNbDLACores()) { + config.setDefaultDeviceType(DeviceType::kDLA); + config.setDLACore(sys.DLACore); + config.setFlag(BuilderFlag::kPREFER_PRECISION_CONSTRAINTS); + + if (sys.fallback) { + config.setFlag(BuilderFlag::kGPU_FALLBACK); + } else { + // Reformatting runs on GPU, so avoid I/O reformatting. + config.setFlag(BuilderFlag::kDIRECT_IO); + } + if (!build.int8) { + config.setFlag(BuilderFlag::kFP16); + } + } else { + err << "Cannot create DLA engine, " << sys.DLACore << " not available" + << std::endl; + return false; + } + } + + if (build.enabledTactics || build.disabledTactics) { + TacticSources tacticSources = config.getTacticSources(); + tacticSources |= build.enabledTactics; + tacticSources &= ~build.disabledTactics; + config.setTacticSources(tacticSources); + } + + return true; +} + +//! +//! \brief Create an engine for a network defintion +//! +//! \return Pointer to the engine created or nullptr if the creation failed +//! +bool networkToEngine(const BuildOptions& build, const SystemOptions& sys, + IBuilder& builder, BuildEnvironment& env, + std::ostream& err) { + TrtUniquePtr config{builder.createBuilderConfig()}; + std::vector> sparseWeights; + SMP_RETVAL_IF_FALSE(config != nullptr, "Config creation failed", false, err); + SMP_RETVAL_IF_FALSE(setupNetworkAndConfig(build, sys, builder, *env.network, + *config, err, sparseWeights), + "Network And Config setup failed", false, err); + + std::unique_ptr timingCache{nullptr}; + // Try to load cache from file. Create a fresh cache if the file doesn't exist + if (build.timingCacheMode == TimingCacheMode::kGLOBAL) { + std::vector loadedCache = loadTimingCacheFile(build.timingCacheFile); + timingCache.reset(config->createTimingCache( + static_cast(loadedCache.data()), loadedCache.size())); + SMP_RETVAL_IF_FALSE(timingCache != nullptr, "TimingCache creation failed", + false, err); + config->setTimingCache(*timingCache, false); + } + + // CUDA stream used for profiling by the builder. + auto profileStream = samplesCommon::makeCudaStream(); + SMP_RETVAL_IF_FALSE(profileStream != nullptr, "Cuda stream creation failed", + false, err); + config->setProfileStream(*profileStream); + + TrtUniquePtr serializedEngine{ + builder.buildSerializedNetwork(*env.network, *config)}; + SMP_RETVAL_IF_FALSE(serializedEngine != nullptr, + "Engine could not be created from network", false, err); + + env.engineBlob.resize(serializedEngine->size()); + std::memcpy(env.engineBlob.data(), serializedEngine->data(), + serializedEngine->size()); + + if (build.safe) { + ASSERT(sample::hasSafeRuntime()); + std::unique_ptr safeRuntime{ + sample::createSafeInferRuntime(sample::gLogger.getTRTLogger())}; + SMP_RETVAL_IF_FALSE(safeRuntime != nullptr, "SafeRuntime creation failed", + false, err); + safeRuntime->setErrorRecorder(&gRecorder); + env.safeEngine.reset(safeRuntime->deserializeCudaEngine( + serializedEngine->data(), serializedEngine->size())); + if (build.consistency) { + checkSafeEngine(serializedEngine->data(), serializedEngine->size()); + } + SMP_RETVAL_IF_FALSE(env.safeEngine != nullptr, + "SafeEngine deserialization failed", false, err); + } else { + TrtUniquePtr runtime{ + createInferRuntime(sample::gLogger.getTRTLogger())}; + SMP_RETVAL_IF_FALSE(runtime != nullptr, "Runtime creation failed", false, + err); + runtime->setErrorRecorder(&gRecorder); + env.engine.reset(runtime->deserializeCudaEngine(serializedEngine->data(), + serializedEngine->size())); + SMP_RETVAL_IF_FALSE(env.engine != nullptr, "Engine deserialization failed", + false, err); + if (build.timingCacheMode == TimingCacheMode::kGLOBAL) { + auto const& timingCache = config->getTimingCache(); + std::unique_ptr timingCacheHostData{ + timingCache->serialize()}; + SMP_RETVAL_IF_FALSE(timingCacheHostData != nullptr, + "Timing Cache serialization failed", false, err); + saveTimingCacheFile(build.timingCacheFile, timingCacheHostData.get()); + } + if (config->getInt8Calibrator()) { + delete config->getInt8Calibrator(); + } + } + return true; +} + +//! +//! \brief Parse a given model, create a network and an engine. +//! +bool modelToBuildEnv(const ModelOptions& model, const BuildOptions& build, + const SystemOptions& sys, BuildEnvironment& env, + std::ostream& err) { + TrtUniquePtr builder{ + createInferBuilder(sample::gLogger.getTRTLogger())}; + SMP_RETVAL_IF_FALSE(builder != nullptr, "Builder creation failed", false, + err); + builder->setErrorRecorder(&gRecorder); + auto networkFlags = + (build.maxBatch) + ? 0U + : 1U << static_cast( + nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH); + + env.network.reset(builder->createNetworkV2(networkFlags)); + SMP_RETVAL_IF_FALSE(env.network != nullptr, "Network creation failed", false, + err); + env.parser = modelToNetwork(model, *env.network, err); + SMP_RETVAL_IF_FALSE(env.parser.operator bool(), "Parsing model failed", false, + err); + SMP_RETVAL_IF_FALSE(networkToEngine(build, sys, *builder, env, err), + "Building engine failed", false, err); + return true; +} + +namespace { +std::pair, std::vector> +getLayerWeightsRolePair(IRefitter& refitter) { + // Get number of refittable items. + auto const nbAll = refitter.getAll(0, nullptr, nullptr); + std::vector layerNames(nbAll); + // Allocate buffers for the items and get them. + std::vector weightsRoles(nbAll); + refitter.getAll(nbAll, layerNames.data(), weightsRoles.data()); + std::vector layerNameStrs(nbAll); + std::transform(layerNames.begin(), layerNames.end(), layerNameStrs.begin(), + [](char const* name) { + if (name == nullptr) { + return std::string{}; + } + return std::string{name}; + }); + return {layerNameStrs, weightsRoles}; +} + +std::pair, std::vector> +getMissingLayerWeightsRolePair(IRefitter& refitter) { + // Get number of refittable items. + auto const nbMissing = refitter.getMissing(0, nullptr, nullptr); + std::vector layerNames(nbMissing); + // Allocate buffers for the items and get them. + std::vector weightsRoles(nbMissing); + refitter.getMissing(nbMissing, layerNames.data(), weightsRoles.data()); + std::vector layerNameStrs(nbMissing); + std::transform(layerNames.begin(), layerNames.end(), layerNameStrs.begin(), + [](char const* name) { + if (name == nullptr) { + return std::string{}; + } + return std::string{name}; + }); + return {layerNameStrs, weightsRoles}; +} + +bool loadEngineToEnv(const std::string& engine, int DLACore, bool safe, + bool enableConsistency, BuildEnvironment& env, + std::ostream& err) { + std::ifstream engineFile(engine, std::ios::binary); + SMP_RETVAL_IF_FALSE(engineFile.good(), "", false, + err << "Error opening engine file: " << engine); + engineFile.seekg(0, std::ifstream::end); + int64_t fsize = engineFile.tellg(); + engineFile.seekg(0, std::ifstream::beg); + + env.engineBlob.resize(fsize); + engineFile.read(reinterpret_cast(env.engineBlob.data()), fsize); + SMP_RETVAL_IF_FALSE(engineFile.good(), "", false, + err << "Error loading engine file: " << engine); + + if (safe) { + ASSERT(sample::hasSafeRuntime()); + std::unique_ptr safeRuntime{ + sample::createSafeInferRuntime(sample::gLogger.getTRTLogger())}; + safeRuntime->setErrorRecorder(&gRecorder); + env.safeEngine.reset( + safeRuntime->deserializeCudaEngine(env.engineBlob.data(), fsize)); + bool result = env.safeEngine != nullptr; + if (result && enableConsistency) { + checkSafeEngine(env.engineBlob.data(), fsize); + } + return result; + } + + TrtUniquePtr runtime{ + createInferRuntime(sample::gLogger.getTRTLogger())}; + if (DLACore != -1) { + runtime->setDLACore(DLACore); + } + runtime->setErrorRecorder(&gRecorder); + env.engine.reset( + runtime->deserializeCudaEngine(env.engineBlob.data(), fsize, nullptr)); + return env.engine != nullptr; +} +} // namespace + +void dumpRefittable(nvinfer1::ICudaEngine& engine) { + TrtUniquePtr refitter{ + createInferRefitter(engine, sample::gLogger.getTRTLogger())}; + if (refitter == nullptr) { + sample::gLogError << "Failed to create a refitter." << std::endl; + return; + } + + auto const& layerWeightsRolePair = getLayerWeightsRolePair(*refitter); + auto const& layerNames = layerWeightsRolePair.first; + auto const& weightsRoles = layerWeightsRolePair.second; + auto const nbAll = layerWeightsRolePair.first.size(); + for (size_t i = 0; i < nbAll; ++i) { + sample::gLogInfo << layerNames[i] << " " << weightsRoles[i] << std::endl; + } +} + +ICudaEngine* loadEngine(const std::string& engine, int DLACore, + std::ostream& err) { + BuildEnvironment env; + return loadEngineToEnv(engine, DLACore, false, false, env, err) + ? env.engine.release() + : nullptr; +} + +bool saveEngine(const ICudaEngine& engine, const std::string& fileName, + std::ostream& err) { + std::ofstream engineFile(fileName, std::ios::binary); + if (!engineFile) { + err << "Cannot open engine file: " << fileName << std::endl; + return false; + } + + TrtUniquePtr serializedEngine{engine.serialize()}; + if (serializedEngine == nullptr) { + err << "Engine serialization failed" << std::endl; + return false; + } + + engineFile.write(static_cast(serializedEngine->data()), + serializedEngine->size()); + return !engineFile.fail(); +} + +bool getEngineBuildEnv(const ModelOptions& model, const BuildOptions& build, + const SystemOptions& sys, BuildEnvironment& env, + std::ostream& err) { + TrtUniquePtr engine; + TrtUniquePtr network; + Parser parser; + + bool createEngineSuccess{false}; + + if (build.load) { + createEngineSuccess = loadEngineToEnv(build.engine, sys.DLACore, build.safe, + build.consistency, env, err); + } else { + createEngineSuccess = modelToBuildEnv(model, build, sys, env, err); + } + + SMP_RETVAL_IF_FALSE(createEngineSuccess, + "Failed to create engine from model.", false, err); + + if (build.save) { + std::ofstream engineFile(build.engine, std::ios::binary); + engineFile.write(reinterpret_cast(env.engineBlob.data()), + env.engineBlob.size()); + SMP_RETVAL_IF_FALSE(!engineFile.fail(), "Saving engine to file failed.", + false, err); + } + return true; +} + +IHostMemory* networkToSerialized(const BuildOptions& build, + const SystemOptions& sys, IBuilder& builder, + INetworkDefinition& network, + std::ostream& err) { + TrtUniquePtr config{builder.createBuilderConfig()}; + std::vector> sparseWeights; + SMP_RETVAL_IF_FALSE(config != nullptr, "Config creation failed", nullptr, + err); + SMP_RETVAL_IF_FALSE(setupNetworkAndConfig(build, sys, builder, network, + *config, err, sparseWeights), + "Network And Config setup failed", nullptr, err); + return builder.buildSerializedNetwork(network, *config); +} + +IHostMemory* modelToSerialized(const ModelOptions& model, + const BuildOptions& build, + const SystemOptions& sys, std::ostream& err) { + TrtUniquePtr builder{ + createInferBuilder(sample::gLogger.getTRTLogger())}; + SMP_RETVAL_IF_FALSE(builder != nullptr, "Builder creation failed", nullptr, + err); + builder->setErrorRecorder(&gRecorder); + + auto networkFlags = + (build.maxBatch) + ? 0U + : 1U << static_cast( + nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH); + + TrtUniquePtr network{ + builder->createNetworkV2(networkFlags)}; + SMP_RETVAL_IF_FALSE(network != nullptr, "Network creation failed", nullptr, + err); + + Parser parser = modelToNetwork(model, *network, err); + SMP_RETVAL_IF_FALSE(parser.operator bool(), "Parsing model failed", nullptr, + err); + + return networkToSerialized(build, sys, *builder, *network, err); +} + +bool serializeAndSave(const ModelOptions& model, const BuildOptions& build, + const SystemOptions& sys, std::ostream& err) { + TrtUniquePtr serialized{ + modelToSerialized(model, build, sys, err)}; + SMP_RETVAL_IF_FALSE(serialized != nullptr, "Network serialization failed", + false, err); + + std::ofstream engineFile(build.engine, std::ios::binary); + SMP_RETVAL_IF_FALSE(!!engineFile, + "Cannot open a file to save a serialize network", false, + err); + engineFile.write(static_cast(serialized->data()), serialized->size()); + return !engineFile.fail(); +} + +// There is not a getWeightsName API, so we need to use WeightsRole. +std::vector> +getAllRefitWeightsForLayer(const ILayer& l) { + switch (l.getType()) { + case LayerType::kCONSTANT: { + const auto& layer = static_cast(l); + return {std::make_pair(WeightsRole::kCONSTANT, layer.getWeights())}; + } + case LayerType::kCONVOLUTION: { + const auto& layer = static_cast(l); + return {std::make_pair(WeightsRole::kKERNEL, layer.getKernelWeights()), + std::make_pair(WeightsRole::kBIAS, layer.getBiasWeights())}; + } + case LayerType::kDECONVOLUTION: { + const auto& layer = static_cast(l); + return {std::make_pair(WeightsRole::kKERNEL, layer.getKernelWeights()), + std::make_pair(WeightsRole::kBIAS, layer.getBiasWeights())}; + } + case LayerType::kFULLY_CONNECTED: { + const auto& layer = static_cast(l); + return {std::make_pair(WeightsRole::kKERNEL, layer.getKernelWeights()), + std::make_pair(WeightsRole::kBIAS, layer.getBiasWeights())}; + } + case LayerType::kSCALE: { + const auto& layer = static_cast(l); + return {std::make_pair(WeightsRole::kSCALE, layer.getScale()), + std::make_pair(WeightsRole::kSHIFT, layer.getShift())}; + } + case LayerType::kRNN_V2: + case LayerType::kACTIVATION: + case LayerType::kPOOLING: + case LayerType::kLRN: + case LayerType::kSOFTMAX: + case LayerType::kSHUFFLE: + case LayerType::kCONCATENATION: + case LayerType::kELEMENTWISE: + case LayerType::kPLUGIN: + case LayerType::kUNARY: + case LayerType::kPADDING: + case LayerType::kREDUCE: + case LayerType::kTOPK: + case LayerType::kGATHER: + case LayerType::kMATRIX_MULTIPLY: + case LayerType::kRAGGED_SOFTMAX: + case LayerType::kIDENTITY: + case LayerType::kPLUGIN_V2: + case LayerType::kSLICE: + case LayerType::kFILL: + case LayerType::kSHAPE: + case LayerType::kPARAMETRIC_RELU: + case LayerType::kRESIZE: + case LayerType::kTRIP_LIMIT: + case LayerType::kRECURRENCE: + case LayerType::kITERATOR: + case LayerType::kLOOP_OUTPUT: + case LayerType::kSELECT: + case LayerType::kQUANTIZE: + case LayerType::kDEQUANTIZE: + case LayerType::kCONDITION: + case LayerType::kCONDITIONAL_INPUT: + case LayerType::kCONDITIONAL_OUTPUT: + case LayerType::kSCATTER: + case LayerType::kEINSUM: + case LayerType::kASSERTION: + return {}; + } + return {}; +} + +bool timeRefit(INetworkDefinition const& network, nvinfer1::ICudaEngine& engine, + bool multiThreading) { + using time_point = std::chrono::time_point; + using durationMs = std::chrono::duration; + + auto const nbLayers = network.getNbLayers(); + TrtUniquePtr refitter{ + createInferRefitter(engine, sample::gLogger.getTRTLogger())}; + // Set max threads that can be used by refitter. + if (multiThreading && !refitter->setMaxThreads(10)) { + sample::gLogError << "Failed to set max threads to refitter." << std::endl; + return false; + } + auto const& layerWeightsRolePair = getLayerWeightsRolePair(*refitter); + // We use std::string instead of const char* since we can have copies of layer + // names. + std::set> layerRoleSet; + + auto const& layerNames = layerWeightsRolePair.first; + auto const& weightsRoles = layerWeightsRolePair.second; + + std::transform(layerNames.begin(), layerNames.end(), weightsRoles.begin(), + std::inserter(layerRoleSet, layerRoleSet.begin()), + [](std::string const& layerName, WeightsRole const role) { + return std::make_pair(layerName, role); + }); + + auto const isRefittable = [&layerRoleSet](char const* layerName, + WeightsRole const role) { + return layerRoleSet.find(std::make_pair(layerName, role)) != + layerRoleSet.end(); + }; + + auto const setWeights = [&] { + for (int32_t i = 0; i < nbLayers; i++) { + auto const layer = network.getLayer(i); + auto const roleWeightsVec = getAllRefitWeightsForLayer(*layer); + for (auto const& roleWeights : roleWeightsVec) { + if (isRefittable(layer->getName(), roleWeights.first)) { + bool const success = refitter->setWeights( + layer->getName(), roleWeights.first, roleWeights.second); + if (!success) { + return false; + } + } + } + } + return true; + }; + + auto const reportMissingWeights = [&] { + auto const& missingPair = getMissingLayerWeightsRolePair(*refitter); + auto const& layerNames = missingPair.first; + auto const& weightsRoles = missingPair.second; + for (size_t i = 0; i < layerNames.size(); ++i) { + sample::gLogError << "Missing (" << layerNames[i] << ", " + << weightsRoles[i] << ") for refitting." << std::endl; + } + return layerNames.empty(); + }; + + // Warm up and report missing weights + bool const success = + setWeights() && reportMissingWeights() && refitter->refitCudaEngine(); + if (!success) { + return false; + } + + constexpr int32_t loop = 10; + time_point const refitStartTime{std::chrono::steady_clock::now()}; + { + for (int32_t l = 0; l < loop; l++) { + bool const success = setWeights() && refitter->refitCudaEngine(); + if (!success) { + return false; + } + } + } + time_point const refitEndTime{std::chrono::steady_clock::now()}; + + sample::gLogInfo << "Engine refitted" + << " in " + << durationMs(refitEndTime - refitStartTime).count() / loop + << " ms." << std::endl; + return true; +} + +namespace { +void* initSafeRuntime() { + void* handle{nullptr}; +#if !defined(_WIN32) + std::string const dllName{samplesCommon::isDebug() + ? "libnvinfer_safe_debug.so.8" + : "libnvinfer_safe.so.8"}; +#if SANITIZER_BUILD + handle = dlopen(dllName.c_str(), RTLD_LAZY | RTLD_NODELETE); +#else + handle = dlopen(dllName.c_str(), RTLD_LAZY); +#endif +#endif + return handle; +} + +void* initConsistencyCheckerLibrary() { + void* handle{nullptr}; +#if !defined(_WIN32) + std::string const dllName{samplesCommon::isDebug() + ? "libnvinfer_checker_debug.so.8" + : "libnvinfer_checker.so.8"}; +#if SANITIZER_BUILD + handle = dlopen(dllName.c_str(), RTLD_LAZY | RTLD_NODELETE); +#else + handle = dlopen(dllName.c_str(), RTLD_LAZY); +#endif +#endif + return handle; +} + +#if !defined(_WIN32) +struct DllDeleter { + void operator()(void* handle) { + if (handle != nullptr) { + dlclose(handle); + } + } +}; +const std::unique_ptr safeRuntimeLibrary{initSafeRuntime()}; +const std::unique_ptr consistencyCheckerLibrary{ + initConsistencyCheckerLibrary()}; +#endif +} // namespace + +bool hasSafeRuntime() { + bool ret{false}; +#if !defined(_WIN32) + ret = (safeRuntimeLibrary != nullptr); +#endif + return ret; +} + +nvinfer1::safe::IRuntime* +createSafeInferRuntime(nvinfer1::ILogger& logger) noexcept { + nvinfer1::safe::IRuntime* runtime{nullptr}; +#if !defined(_WIN32) + constexpr char symbolName[] = + "_ZN8nvinfer14safe18createInferRuntimeERNS_7ILoggerE"; + typedef nvinfer1::safe::IRuntime* (*CreateInferRuntimeFn)(nvinfer1::ILogger & + logger); + if (hasSafeRuntime()) { + auto createFn = reinterpret_cast( + dlsym(safeRuntimeLibrary.get(), symbolName)); + if (createFn != nullptr) { + runtime = createFn(logger); + } + } +#endif + return runtime; +} + +bool hasConsistencyChecker() { + bool ret{false}; +#if !defined(_WIN32) + ret = (consistencyCheckerLibrary != nullptr); +#endif + return ret; +} + +nvinfer1::consistency::IConsistencyChecker* +createConsistencyChecker(nvinfer1::ILogger& logger, + void const* serializedEngine, + int32_t const engineSize) noexcept { + nvinfer1::consistency::IConsistencyChecker* checker{nullptr}; + + if (serializedEngine == nullptr || engineSize == 0) { + return checker; + } + +#if !defined(_WIN32) + constexpr char symbolName[] = "createConsistencyChecker_INTERNAL"; + typedef nvinfer1::consistency::IConsistencyChecker* (*CreateCheckerFn)( + nvinfer1::ILogger * logger, void const* data, size_t size, + uint32_t version); + if (hasSafeRuntime()) { + auto createFn = reinterpret_cast( + dlsym(consistencyCheckerLibrary.get(), symbolName)); + if (createFn != nullptr) { + checker = + createFn(&logger, serializedEngine, engineSize, NV_TENSORRT_VERSION); + } + } +#endif + return checker; +} + +bool checkSafeEngine(void const* serializedEngine, int32_t const engineSize) { + if (!hasConsistencyChecker()) { + sample::gLogError << "Cannot perform consistency check because the checker " + "is not loaded.." + << std::endl; + return false; + } + auto checker = std::unique_ptr( + createConsistencyChecker(sample::gLogger.getTRTLogger(), serializedEngine, + engineSize)); + if (checker.get() == nullptr) { + sample::gLogError << "Failed to create consistency checker." << std::endl; + return false; + } + sample::gLogInfo << "Start consistency checking." << std::endl; + if (!checker->validate()) { + sample::gLogError << "Consistency validation failed." << std::endl; + return false; + } + sample::gLogInfo << "Consistency validation passed." << std::endl; + return true; +} +} // namespace sample diff --git a/fastdeploy/backends/tensorrt/common/sampleEngines.h b/fastdeploy/backends/tensorrt/common/sampleEngines.h new file mode 100644 index 0000000000..70e29fcb8a --- /dev/null +++ b/fastdeploy/backends/tensorrt/common/sampleEngines.h @@ -0,0 +1,195 @@ +/* + * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef TRT_SAMPLE_ENGINES_H +#define TRT_SAMPLE_ENGINES_H + +#include +#include + +#include "NvCaffeParser.h" +#include "NvInfer.h" +#include "NvInferConsistency.h" +#include "NvInferSafeRuntime.h" +#include "NvOnnxParser.h" +#include "sampleOptions.h" +#include "sampleUtils.h" + +namespace sample { + +struct Parser { + TrtUniquePtr caffeParser; + TrtUniquePtr onnxParser; + + operator bool() const { return caffeParser || onnxParser; } +}; + +struct BuildEnvironment { + TrtUniquePtr network; + //! Parser that creates the network. Must be declared *after* network, so that + //! when + //! ~BuildEnvironment() executes, the parser is destroyed before the network + //! is destroyed. + Parser parser; + TrtUniquePtr engine; + std::unique_ptr safeEngine; + std::vector engineBlob; +}; + +//! +//! \brief Generate a network definition for a given model +//! +//! \return Parser The parser used to initialize the network and that holds the +//! weights for the network, or an invalid +//! parser (the returned parser converts to false if tested) +//! +//! Constant input dimensions in the model must not be changed in the +//! corresponding +//! network definition, because its correctness may rely on the constants. +//! +//! \see Parser::operator bool() +//! +Parser modelToNetwork(const ModelOptions& model, + nvinfer1::INetworkDefinition& network, std::ostream& err); + +//! +//! \brief Set up network and config +//! +//! \return boolean Return true if network and config were successfully set +//! +bool setupNetworkAndConfig(const BuildOptions& build, const SystemOptions& sys, + IBuilder& builder, INetworkDefinition& network, + IBuilderConfig& config, std::ostream& err, + std::vector>& sparseWeights); + +//! +//! \brief Log refittable layers and weights of a refittable engine +//! +void dumpRefittable(nvinfer1::ICudaEngine& engine); + +//! +//! \brief Load a serialized engine +//! +//! \return Pointer to the engine loaded or nullptr if the operation failed +//! +nvinfer1::ICudaEngine* loadEngine(const std::string& engine, int DLACore, + std::ostream& err); + +//! +//! \brief Save an engine into a file +//! +//! \return boolean Return true if the engine was successfully saved +//! +bool saveEngine(const nvinfer1::ICudaEngine& engine, + const std::string& fileName, std::ostream& err); + +//! +//! \brief Create an engine from model or serialized file, and optionally save +//! engine +//! +//! \return Pointer to the engine created or nullptr if the creation failed +//! +bool getEngineBuildEnv(const ModelOptions& model, const BuildOptions& build, + const SystemOptions& sys, BuildEnvironment& env, + std::ostream& err); + +//! +//! \brief Create an engine from model or serialized file, and optionally save +//! engine +//! +//! \return Pointer to the engine created or nullptr if the creation failed +//! +inline TrtUniquePtr getEngine(const ModelOptions& model, + const BuildOptions& build, + const SystemOptions& sys, + std::ostream& err) { + BuildEnvironment env; + TrtUniquePtr engine; + if (getEngineBuildEnv(model, build, sys, env, err)) { + engine.swap(env.engine); + } + return engine; +} + +//! +//! \brief Create a serialized network +//! +//! \return Pointer to a host memory for a serialized network +//! +IHostMemory* networkToSerialized(const BuildOptions& build, + const SystemOptions& sys, IBuilder& builder, + INetworkDefinition& network, + std::ostream& err); + +//! +//! \brief Tranfer model to a serialized network +//! +//! \return Pointer to a host memory for a serialized network +//! +IHostMemory* modelToSerialized(const ModelOptions& model, + const BuildOptions& build, + const SystemOptions& sys, std::ostream& err); + +//! +//! \brief Serialize network and save it into a file +//! +//! \return boolean Return true if the network was successfully serialized and +//! saved +//! +bool serializeAndSave(const ModelOptions& model, const BuildOptions& build, + const SystemOptions& sys, std::ostream& err); + +bool timeRefit(const INetworkDefinition& network, nvinfer1::ICudaEngine& engine, + bool multiThreading); + +//! +//! \brief Set tensor scales from a calibration table +//! +void setTensorScalesFromCalibration(nvinfer1::INetworkDefinition& network, + const std::vector& inputFormats, + const std::vector& outputFormats, + const std::string& calibrationFile); + +//! +//! \brief Check if safe runtime is loaded. +//! +bool hasSafeRuntime(); + +//! +//! \brief Create a safe runtime object if the dynamic library is loaded. +//! +nvinfer1::safe::IRuntime* +createSafeInferRuntime(nvinfer1::ILogger& logger) noexcept; + +//! +//! \brief Check if consistency checker is loaded. +//! +bool hasConsistencyChecker(); + +//! +//! \brief Create a consistency checker object if the dynamic library is loaded. +//! +nvinfer1::consistency::IConsistencyChecker* +createConsistencyChecker(nvinfer1::ILogger& logger, + IHostMemory const* engine) noexcept; + +//! +//! \brief Run consistency check on serialized engine. +//! +bool checkSafeEngine(void const* serializedEngine, int32_t const engineSize); +} // namespace sample + +#endif // TRT_SAMPLE_ENGINES_H diff --git a/fastdeploy/backends/tensorrt/common/sampleInference.cpp b/fastdeploy/backends/tensorrt/common/sampleInference.cpp new file mode 100644 index 0000000000..fd7e9f82ff --- /dev/null +++ b/fastdeploy/backends/tensorrt/common/sampleInference.cpp @@ -0,0 +1,943 @@ +/* + * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(__QNX__) +#include +#include +#endif + +#include "NvInfer.h" + +#include "ErrorRecorder.h" +#include "logger.h" +#include "sampleDevice.h" +#include "sampleEngines.h" +#include "sampleInference.h" +#include "sampleOptions.h" +#include "sampleReporting.h" +#include "sampleUtils.h" + +namespace sample { + +template +bool validateTensorNames(const MapType& map, const EngineType* engine, + const int32_t endBindingIndex) { + // Check if the provided input tensor names match the input tensors of the + // engine. + // Throw an error if the provided input tensor names cannot be found because + // it implies a potential typo. + for (const auto& item : map) { + bool tensorNameFound{false}; + for (int32_t b = 0; b < endBindingIndex; ++b) { + if (engine->bindingIsInput(b) && + engine->getBindingName(b) == item.first) { + tensorNameFound = true; + break; + } + } + if (!tensorNameFound) { + sample::gLogError + << "Cannot find input tensor with name \"" << item.first + << "\" in the engine bindings! " + << "Please make sure the input tensor names are correct." + << std::endl; + return false; + } + } + return true; +} + +template class FillBindingClosure { + private: + using InputsMap = std::unordered_map; + using BindingsVector = std::vector>; + + EngineType const* engine; + ContextType const* context; + InputsMap const& inputs; + BindingsVector& bindings; + int32_t batch; + int32_t endBindingIndex; + + void fillOneBinding(int32_t bindingIndex, int64_t vol) { + auto const dims = getDims(bindingIndex); + auto const name = engine->getBindingName(bindingIndex); + auto const isInput = engine->bindingIsInput(bindingIndex); + auto const dataType = engine->getBindingDataType(bindingIndex); + auto const* bindingInOutStr = isInput ? "input" : "output"; + for (auto& binding : bindings) { + const auto input = inputs.find(name); + if (isInput && input != inputs.end()) { + sample::gLogInfo << "Using values loaded from " << input->second + << " for input " << name << std::endl; + binding->addBinding(bindingIndex, name, isInput, vol, dataType, + input->second); + } else { + sample::gLogInfo << "Using random values for " << bindingInOutStr << " " + << name << std::endl; + binding->addBinding(bindingIndex, name, isInput, vol, dataType); + } + sample::gLogInfo << "Created " << bindingInOutStr << " binding for " + << name << " with dimensions " << dims << std::endl; + } + } + + bool fillAllBindings(int32_t batch, int32_t endBindingIndex) { + if (!validateTensorNames(inputs, engine, endBindingIndex)) { + sample::gLogError << "Invalid tensor names found in --loadInputs flag." + << std::endl; + return false; + } + + for (int32_t b = 0; b < endBindingIndex; b++) { + auto const dims = getDims(b); + auto const comps = engine->getBindingComponentsPerElement(b); + auto const strides = context->getStrides(b); + int32_t const vectorDimIndex = engine->getBindingVectorizedDim(b); + auto const vol = volume(dims, strides, vectorDimIndex, comps, batch); + fillOneBinding(b, vol); + } + return true; + } + + Dims getDims(int32_t bindingIndex); + + public: + FillBindingClosure(EngineType const* _engine, ContextType const* _context, + InputsMap const& _inputs, BindingsVector& _bindings, + int32_t _batch, int32_t _endBindingIndex) + : engine(_engine), context(_context), inputs(_inputs), + bindings(_bindings), batch(_batch), endBindingIndex(_endBindingIndex) {} + + bool operator()() { return fillAllBindings(batch, endBindingIndex); } +}; + +template <> +Dims FillBindingClosure:: + getDims(int32_t bindingIndex) { + return context->getBindingDimensions(bindingIndex); +} + +template <> +Dims FillBindingClosure< + nvinfer1::safe::ICudaEngine, + nvinfer1::safe::IExecutionContext>::getDims(int32_t bindingIndex) { + return engine->getBindingDimensions(bindingIndex); +} + +bool setUpInference(InferenceEnvironment& iEnv, + const InferenceOptions& inference) { + int32_t device{}; + cudaCheck(cudaGetDevice(&device)); + + cudaDeviceProp properties; + cudaCheck(cudaGetDeviceProperties(&properties, device)); + // Use managed memory on integrated devices when transfers are skipped + // and when it is explicitly requested on the commandline. + bool useManagedMemory{(inference.skipTransfers && properties.integrated) || + inference.useManaged}; + using FillSafeBindings = + FillBindingClosure; + if (iEnv.safe) { + ASSERT(sample::hasSafeRuntime()); + auto* safeEngine = iEnv.safeEngine.get(); + for (int32_t s = 0; s < inference.streams; ++s) { + iEnv.safeContext.emplace_back(safeEngine->createExecutionContext()); + iEnv.bindings.emplace_back(new Bindings(useManagedMemory)); + } + const int32_t nBindings = safeEngine->getNbBindings(); + auto const* safeContext = iEnv.safeContext.front().get(); + // batch is set to 1 because safety only support explicit batch. + return FillSafeBindings(iEnv.safeEngine.get(), safeContext, + inference.inputs, iEnv.bindings, 1, nBindings)(); + } + + using FillStdBindings = + FillBindingClosure; + + for (int32_t s = 0; s < inference.streams; ++s) { + auto ec = iEnv.engine->createExecutionContext(); + if (ec == nullptr) { + sample::gLogError << "Unable to create execution context for stream " << s + << "." << std::endl; + return false; + } + iEnv.context.emplace_back(ec); + iEnv.bindings.emplace_back(new Bindings(useManagedMemory)); + } + if (iEnv.profiler) { + iEnv.context.front()->setProfiler(iEnv.profiler.get()); + // Always run reportToProfiler() after enqueue launch + iEnv.context.front()->setEnqueueEmitsProfile(false); + } + + const int32_t nOptProfiles = iEnv.engine->getNbOptimizationProfiles(); + const int32_t nBindings = iEnv.engine->getNbBindings(); + const int32_t bindingsInProfile = + nOptProfiles > 0 ? nBindings / nOptProfiles : 0; + const int32_t endBindingIndex = + bindingsInProfile ? bindingsInProfile : iEnv.engine->getNbBindings(); + + if (nOptProfiles > 1) { + sample::gLogWarning << "Multiple profiles are currently not supported. " + "Running with one profile." + << std::endl; + } + + // Make sure that the tensor names provided in command-line args actually + // exist in any of the engine bindings + // to avoid silent typos. + if (!validateTensorNames(inference.shapes, iEnv.engine.get(), + endBindingIndex)) { + sample::gLogError << "Invalid tensor names found in --shapes flag." + << std::endl; + return false; + } + + // Set all input dimensions before all bindings can be allocated + for (int32_t b = 0; b < endBindingIndex; ++b) { + if (iEnv.engine->bindingIsInput(b)) { + auto dims = iEnv.context.front()->getBindingDimensions(b); + const bool isScalar = dims.nbDims == 0; + const bool isDynamicInput = + std::any_of(dims.d, dims.d + dims.nbDims, + [](int32_t dim) { return dim == -1; }) || + iEnv.engine->isShapeBinding(b); + if (isDynamicInput) { + auto shape = inference.shapes.find(iEnv.engine->getBindingName(b)); + + std::vector staticDims; + if (shape == inference.shapes.end()) { + // If no shape is provided, set dynamic dimensions to 1. + constexpr int32_t DEFAULT_DIMENSION = 1; + if (iEnv.engine->isShapeBinding(b)) { + if (isScalar) { + staticDims.push_back(1); + } else { + staticDims.resize(dims.d[0]); + std::fill(staticDims.begin(), staticDims.end(), + DEFAULT_DIMENSION); + } + } else { + staticDims.resize(dims.nbDims); + std::transform(dims.d, dims.d + dims.nbDims, staticDims.begin(), + [&](int32_t dimension) { + return dimension >= 0 ? dimension + : DEFAULT_DIMENSION; + }); + } + sample::gLogWarning << "Dynamic dimensions required for input: " + << iEnv.engine->getBindingName(b) + << ", but no shapes were provided. Automatically " + "overriding shape to: " + << staticDims << std::endl; + } else if (inference.inputs.count(shape->first) && + iEnv.engine->isShapeBinding(b)) { + if (isScalar || dims.nbDims == 1) { + // Load shape tensor from file. + size_t const size = isScalar ? 1 : dims.d[0]; + staticDims.resize(size); + auto const& filename = inference.inputs.at(shape->first); + auto dst = reinterpret_cast(staticDims.data()); + loadFromFile(filename, dst, + size * sizeof(decltype(staticDims)::value_type)); + } else { + sample::gLogWarning << "Cannot load shape tensor " << shape->first + << " from file, " + << "ND-Shape isn't supported yet" << std::endl; + // Fallback + staticDims = shape->second; + } + } else { + staticDims = shape->second; + } + + for (auto& c : iEnv.context) { + if (iEnv.engine->isShapeBinding(b)) { + if (!c->setInputShapeBinding(b, staticDims.data())) { + return false; + } + } else { + if (!c->setBindingDimensions(b, toDims(staticDims))) { + return false; + } + } + } + } + } + } + + auto* engine = iEnv.engine.get(); + auto const* context = iEnv.context.front().get(); + int32_t const batch = + engine->hasImplicitBatchDimension() ? inference.batch : 1; + return FillStdBindings(engine, context, inference.inputs, iEnv.bindings, + batch, endBindingIndex)(); +} + +namespace { + +#if defined(__QNX__) +using TimePoint = double; +#else +using TimePoint = std::chrono::time_point; +#endif + +TimePoint getCurrentTime() { +#if defined(__QNX__) + uint64_t const currentCycles = ClockCycles(); + uint64_t const cyclesPerSecond = SYSPAGE_ENTRY(qtime)->cycles_per_sec; + // Return current timestamp in ms. + return static_cast(currentCycles) * 1000. / cyclesPerSecond; +#else + return std::chrono::high_resolution_clock::now(); +#endif +} + +//! +//! \struct SyncStruct +//! \brief Threads synchronization structure +//! +struct SyncStruct { + std::mutex mutex; + TrtCudaStream mainStream; + TrtCudaEvent gpuStart{cudaEventBlockingSync}; + TimePoint cpuStart{}; + float sleep{}; +}; + +struct Enqueue { + explicit Enqueue(nvinfer1::IExecutionContext& context, void** buffers) + : mContext(context), mBuffers(buffers) {} + + nvinfer1::IExecutionContext& mContext; + void** mBuffers{}; +}; + +//! +//! \class EnqueueImplicit +//! \brief Functor to enqueue inference with implict batch +//! +class EnqueueImplicit : private Enqueue { + public: + explicit EnqueueImplicit(nvinfer1::IExecutionContext& context, void** buffers, + int32_t batch) + : Enqueue(context, buffers), mBatch(batch) {} + + bool operator()(TrtCudaStream& stream) const { + if (mContext.enqueue(mBatch, mBuffers, stream.get(), nullptr)) { + // Collecting layer timing info from current profile index of execution + // context + if (mContext.getProfiler() && !mContext.getEnqueueEmitsProfile() && + !mContext.reportToProfiler()) { + gLogWarning + << "Failed to collect layer timing info from previous enqueue()" + << std::endl; + } + return true; + } + return false; + } + + private: + int32_t mBatch; +}; + +//! +//! \class EnqueueExplicit +//! \brief Functor to enqueue inference with explict batch +//! +class EnqueueExplicit : private Enqueue { + public: + explicit EnqueueExplicit(nvinfer1::IExecutionContext& context, void** buffers) + : Enqueue(context, buffers) {} + + bool operator()(TrtCudaStream& stream) const { + if (mContext.enqueueV2(mBuffers, stream.get(), nullptr)) { + // Collecting layer timing info from current profile index of execution + // context + if (mContext.getProfiler() && !mContext.getEnqueueEmitsProfile() && + !mContext.reportToProfiler()) { + gLogWarning + << "Failed to collect layer timing info from previous enqueueV2()" + << std::endl; + } + return true; + } + return false; + } +}; + +//! +//! \class EnqueueGraph +//! \brief Functor to enqueue inference from CUDA Graph +//! +class EnqueueGraph { + public: + explicit EnqueueGraph(nvinfer1::IExecutionContext& context, + TrtCudaGraph& graph) + : mGraph(graph), mContext(context) {} + + bool operator()(TrtCudaStream& stream) const { + if (mGraph.launch(stream)) { + // Collecting layer timing info from current profile index of execution + // context + if (mContext.getProfiler() && !mContext.reportToProfiler()) { + gLogWarning << "Failed to collect layer timing info from previous CUDA " + "graph launch" + << std::endl; + } + return true; + } + return false; + } + + TrtCudaGraph& mGraph; + nvinfer1::IExecutionContext& mContext; +}; + +//! +//! \class EnqueueSafe +//! \brief Functor to enqueue safe execution context +//! +class EnqueueSafe { + public: + explicit EnqueueSafe(nvinfer1::safe::IExecutionContext& context, + void** buffers) + : mContext(context), mBuffers(buffers) {} + + bool operator()(TrtCudaStream& stream) const { + if (mContext.enqueueV2(mBuffers, stream.get(), nullptr)) { + return true; + } + return false; + } + + nvinfer1::safe::IExecutionContext& mContext; + void** mBuffers{}; +}; + +using EnqueueFunction = std::function; + +enum class StreamType : int32_t { + kINPUT = 0, + kCOMPUTE = 1, + kOUTPUT = 2, + kNUM = 3 +}; + +enum class EventType : int32_t { + kINPUT_S = 0, + kINPUT_E = 1, + kCOMPUTE_S = 2, + kCOMPUTE_E = 3, + kOUTPUT_S = 4, + kOUTPUT_E = 5, + kNUM = 6 +}; + +using MultiStream = + std::array(StreamType::kNUM)>; + +using MultiEvent = std::array, + static_cast(EventType::kNUM)>; + +using EnqueueTimes = std::array; + +//! +//! \class Iteration +//! \brief Inference iteration and streams management +//! +template class Iteration { + public: + Iteration(int32_t id, const InferenceOptions& inference, ContextType& context, + Bindings& bindings) + : mBindings(bindings), mStreamId(id), mDepth(1 + inference.overlap), + mActive(mDepth), mEvents(mDepth), mEnqueueTimes(mDepth), + mContext(&context) { + for (int32_t d = 0; d < mDepth; ++d) { + for (int32_t e = 0; e < static_cast(EventType::kNUM); ++e) { + mEvents[d][e].reset(new TrtCudaEvent(!inference.spin)); + } + } + createEnqueueFunction(inference, context, bindings); + } + + bool query(bool skipTransfers) { + if (mActive[mNext]) { + return true; + } + + if (!skipTransfers) { + record(EventType::kINPUT_S, StreamType::kINPUT); + mBindings.transferInputToDevice(getStream(StreamType::kINPUT)); + record(EventType::kINPUT_E, StreamType::kINPUT); + wait(EventType::kINPUT_E, + StreamType::kCOMPUTE); // Wait for input DMA before compute + } + + record(EventType::kCOMPUTE_S, StreamType::kCOMPUTE); + recordEnqueueTime(); + if (!mEnqueue(getStream(StreamType::kCOMPUTE))) { + return false; + } + recordEnqueueTime(); + record(EventType::kCOMPUTE_E, StreamType::kCOMPUTE); + + if (!skipTransfers) { + wait(EventType::kCOMPUTE_E, + StreamType::kOUTPUT); // Wait for compute before output DMA + record(EventType::kOUTPUT_S, StreamType::kOUTPUT); + mBindings.transferOutputToHost(getStream(StreamType::kOUTPUT)); + record(EventType::kOUTPUT_E, StreamType::kOUTPUT); + } + + mActive[mNext] = true; + moveNext(); + return true; + } + + float sync(const TimePoint& cpuStart, const TrtCudaEvent& gpuStart, + std::vector& trace, bool skipTransfers) { + if (mActive[mNext]) { + if (skipTransfers) { + getEvent(EventType::kCOMPUTE_E).synchronize(); + } else { + getEvent(EventType::kOUTPUT_E).synchronize(); + } + trace.emplace_back(getTrace(cpuStart, gpuStart, skipTransfers)); + mActive[mNext] = false; + return getEvent(EventType::kCOMPUTE_S) - gpuStart; + } + return 0; + } + + void syncAll(const TimePoint& cpuStart, const TrtCudaEvent& gpuStart, + std::vector& trace, bool skipTransfers) { + for (int32_t d = 0; d < mDepth; ++d) { + sync(cpuStart, gpuStart, trace, skipTransfers); + moveNext(); + } + } + + void wait(TrtCudaEvent& gpuStart) { + getStream(StreamType::kINPUT).wait(gpuStart); + } + + void setInputData() { + mBindings.transferInputToDevice(getStream(StreamType::kINPUT)); + } + + void fetchOutputData() { + mBindings.transferOutputToHost(getStream(StreamType::kOUTPUT)); + } + + private: + void moveNext() { mNext = mDepth - 1 - mNext; } + + TrtCudaStream& getStream(StreamType t) { + return mStream[static_cast(t)]; + } + + TrtCudaEvent& getEvent(EventType t) { + return *mEvents[mNext][static_cast(t)]; + } + + void record(EventType e, StreamType s) { getEvent(e).record(getStream(s)); } + + void recordEnqueueTime() { + mEnqueueTimes[mNext][enqueueStart] = getCurrentTime(); + enqueueStart = 1 - enqueueStart; + } + + TimePoint getEnqueueTime(bool start) { + return mEnqueueTimes[mNext][start ? 0 : 1]; + } + + void wait(EventType e, StreamType s) { getStream(s).wait(getEvent(e)); } + + InferenceTrace getTrace(const TimePoint& cpuStart, + const TrtCudaEvent& gpuStart, bool skipTransfers) { + float is = skipTransfers ? getEvent(EventType::kCOMPUTE_S) - gpuStart + : getEvent(EventType::kINPUT_S) - gpuStart; + float ie = skipTransfers ? getEvent(EventType::kCOMPUTE_S) - gpuStart + : getEvent(EventType::kINPUT_E) - gpuStart; + float os = skipTransfers ? getEvent(EventType::kCOMPUTE_E) - gpuStart + : getEvent(EventType::kOUTPUT_S) - gpuStart; + float oe = skipTransfers ? getEvent(EventType::kCOMPUTE_E) - gpuStart + : getEvent(EventType::kOUTPUT_E) - gpuStart; + + return InferenceTrace(mStreamId, + std::chrono::duration( + getEnqueueTime(true) - cpuStart) + .count(), + std::chrono::duration( + getEnqueueTime(false) - cpuStart) + .count(), + is, ie, getEvent(EventType::kCOMPUTE_S) - gpuStart, + getEvent(EventType::kCOMPUTE_E) - gpuStart, os, oe); + } + + void createEnqueueFunction(const InferenceOptions& inference, + nvinfer1::IExecutionContext& context, + Bindings& bindings) { + if (inference.batch) { + mEnqueue = EnqueueFunction(EnqueueImplicit( + context, mBindings.getDeviceBuffers(), inference.batch)); + } else { + mEnqueue = EnqueueFunction( + EnqueueExplicit(context, mBindings.getDeviceBuffers())); + } + if (inference.graph) { + TrtCudaStream& stream = getStream(StreamType::kCOMPUTE); + // Avoid capturing initialization calls by executing the enqueue function + // at least + // once before starting CUDA graph capture. + const auto ret = mEnqueue(stream); + assert(ret); + stream.synchronize(); + + mGraph.beginCapture(stream); + // The built TRT engine may contain operations that are not permitted + // under CUDA graph capture mode. + // When the stream is capturing, the enqueue call may return false if the + // current CUDA graph capture fails. + if (mEnqueue(stream)) { + mGraph.endCapture(stream); + mEnqueue = EnqueueFunction(EnqueueGraph(context, mGraph)); + } else { + mGraph.endCaptureOnError(stream); + // Ensure any CUDA error has been cleaned up. + cudaCheck(cudaGetLastError()); + sample::gLogWarning << "The built TensorRT engine contains operations " + "that are not permitted under " + "CUDA graph capture mode." + << std::endl; + sample::gLogWarning << "The specified --useCudaGraph flag has been " + "ignored. The inference will be " + "launched without using CUDA graph launch." + << std::endl; + } + } + } + + void createEnqueueFunction(const InferenceOptions&, + nvinfer1::safe::IExecutionContext& context, + Bindings&) { + mEnqueue = + EnqueueFunction(EnqueueSafe(context, mBindings.getDeviceBuffers())); + } + + Bindings& mBindings; + + TrtCudaGraph mGraph; + EnqueueFunction mEnqueue; + + int32_t mStreamId{0}; + int32_t mNext{0}; + int32_t mDepth{2}; // default to double buffer to hide DMA transfers + + std::vector mActive; + MultiStream mStream; + std::vector mEvents; + + int32_t enqueueStart{0}; + std::vector mEnqueueTimes; + ContextType* mContext{nullptr}; +}; + +template +bool inferenceLoop( + std::vector>>& iStreams, + const TimePoint& cpuStart, const TrtCudaEvent& gpuStart, int iterations, + float maxDurationMs, float warmupMs, std::vector& trace, + bool skipTransfers, float idleMs) { + float durationMs = 0; + int32_t skip = 0; + + for (int32_t i = 0; i < iterations + skip || durationMs < maxDurationMs; + ++i) { + for (auto& s : iStreams) { + if (!s->query(skipTransfers)) { + return false; + } + } + for (auto& s : iStreams) { + durationMs = std::max(durationMs, + s->sync(cpuStart, gpuStart, trace, skipTransfers)); + } + if (durationMs < warmupMs) // Warming up + { + if (durationMs) // Skip complete iterations + { + ++skip; + } + continue; + } + if (idleMs != 0.F) { + std::this_thread::sleep_for( + std::chrono::duration(idleMs)); + } + } + for (auto& s : iStreams) { + s->syncAll(cpuStart, gpuStart, trace, skipTransfers); + } + return true; +} + +template +void inferenceExecution(const InferenceOptions& inference, + InferenceEnvironment& iEnv, SyncStruct& sync, + const int32_t threadIdx, const int32_t streamsPerThread, + int32_t device, std::vector& trace) { + float warmupMs = inference.warmup; + float durationMs = inference.duration * 1000.F + warmupMs; + + cudaCheck(cudaSetDevice(device)); + + std::vector>> iStreams; + + for (int32_t s = 0; s < streamsPerThread; ++s) { + const int32_t streamId{threadIdx * streamsPerThread + s}; + auto* iteration = new Iteration( + streamId, inference, *iEnv.template getContext(streamId), + *iEnv.bindings[streamId]); + if (inference.skipTransfers) { + iteration->setInputData(); + } + iStreams.emplace_back(iteration); + } + + for (auto& s : iStreams) { + s->wait(sync.gpuStart); + } + + std::vector localTrace; + if (!inferenceLoop(iStreams, sync.cpuStart, sync.gpuStart, + inference.iterations, durationMs, warmupMs, localTrace, + inference.skipTransfers, inference.idle)) { + iEnv.error = true; + } + + if (inference.skipTransfers) { + for (auto& s : iStreams) { + s->fetchOutputData(); + } + } + + sync.mutex.lock(); + trace.insert(trace.end(), localTrace.begin(), localTrace.end()); + sync.mutex.unlock(); +} + +inline std::thread makeThread(const InferenceOptions& inference, + InferenceEnvironment& iEnv, SyncStruct& sync, + int32_t threadIdx, int32_t streamsPerThread, + int32_t device, + std::vector& trace) { + if (iEnv.safe) { + ASSERT(sample::hasSafeRuntime()); + return std::thread(inferenceExecution, + std::cref(inference), std::ref(iEnv), std::ref(sync), + threadIdx, streamsPerThread, device, std::ref(trace)); + } + + return std::thread(inferenceExecution, + std::cref(inference), std::ref(iEnv), std::ref(sync), + threadIdx, streamsPerThread, device, std::ref(trace)); +} + +} // namespace + +bool runInference(const InferenceOptions& inference, InferenceEnvironment& iEnv, + int32_t device, std::vector& trace) { + cudaCheck(cudaProfilerStart()); + + trace.resize(0); + + SyncStruct sync; + sync.sleep = inference.sleep; + sync.mainStream.sleep(&sync.sleep); + sync.cpuStart = getCurrentTime(); + sync.gpuStart.record(sync.mainStream); + + // When multiple streams are used, trtexec can run inference in two modes: + // (1) if inference.threads is true, then run each stream on each thread. + // (2) if inference.threads is false, then run all streams on the same thread. + const int32_t numThreads = inference.threads ? inference.streams : 1; + const int32_t streamsPerThread = inference.threads ? 1 : inference.streams; + + std::vector threads; + for (int32_t threadIdx = 0; threadIdx < numThreads; ++threadIdx) { + threads.emplace_back(makeThread(inference, iEnv, sync, threadIdx, + streamsPerThread, device, trace)); + } + for (auto& th : threads) { + th.join(); + } + + cudaCheck(cudaProfilerStop()); + + auto cmpTrace = [](const InferenceTrace& a, const InferenceTrace& b) { + return a.h2dStart < b.h2dStart; + }; + std::sort(trace.begin(), trace.end(), cmpTrace); + + return !iEnv.error; +} + +namespace { +size_t reportGpuMemory() { + static size_t prevFree{0}; + size_t free{0}; + size_t total{0}; + size_t newlyAllocated{0}; + cudaCheck(cudaMemGetInfo(&free, &total)); + sample::gLogInfo << "Free GPU memory = " << free / 1024.0_MiB << " GiB"; + if (prevFree != 0) { + newlyAllocated = (prevFree - free); + sample::gLogInfo << ", newly allocated GPU memory = " + << newlyAllocated / 1024.0_MiB << " GiB"; + } + sample::gLogInfo << ", total GPU memory = " << total / 1024.0_MiB << " GiB" + << std::endl; + prevFree = free; + return newlyAllocated; +} +} // namespace + +//! Returns true if deserialization is slower than expected or fails. +bool timeDeserialize(InferenceEnvironment& iEnv) { + constexpr int32_t kNB_ITERS{20}; + std::unique_ptr rt{ + createInferRuntime(sample::gLogger.getTRTLogger())}; + std::unique_ptr engine; + + std::unique_ptr safeRT{ + sample::createSafeInferRuntime(sample::gLogger.getTRTLogger())}; + std::unique_ptr safeEngine; + + if (iEnv.safe) { + ASSERT(sample::hasSafeRuntime() && safeRT != nullptr); + safeRT->setErrorRecorder(&gRecorder); + } + + auto timeDeserializeFn = [&]() -> float { + bool deserializeOK{false}; + engine.reset(nullptr); + safeEngine.reset(nullptr); + auto startClock = std::chrono::high_resolution_clock::now(); + if (iEnv.safe) { + safeEngine.reset(safeRT->deserializeCudaEngine(iEnv.engineBlob.data(), + iEnv.engineBlob.size())); + deserializeOK = (safeEngine != nullptr); + } else { + engine.reset(rt->deserializeCudaEngine(iEnv.engineBlob.data(), + iEnv.engineBlob.size(), nullptr)); + deserializeOK = (engine != nullptr); + } + auto endClock = std::chrono::high_resolution_clock::now(); + // return NAN if deserialization failed. + return deserializeOK + ? std::chrono::duration(endClock - startClock) + .count() + : NAN; + }; + + // Warmup the caches to make sure that cache thrashing isn't throwing off the + // results + { + sample::gLogInfo << "Begin deserialization warmup..." << std::endl; + for (int32_t i = 0, e = 2; i < e; ++i) { + timeDeserializeFn(); + } + } + sample::gLogInfo << "Begin deserialization engine timing..." << std::endl; + float const first = timeDeserializeFn(); + + // Check if first deserialization suceeded. + if (std::isnan(first)) { + sample::gLogError << "Engine deserialization failed." << std::endl; + return true; + } + + sample::gLogInfo << "First deserialization time = " << first + << " milliseconds" << std::endl; + + // Record initial gpu memory state. + reportGpuMemory(); + + float totalTime{0.F}; + for (int32_t i = 0; i < kNB_ITERS; ++i) { + totalTime += timeDeserializeFn(); + } + const auto averageTime = totalTime / kNB_ITERS; + // reportGpuMemory sometimes reports zero after a single deserialization of a + // small engine, + // so use the size of memory for all the iterations. + const auto totalEngineSizeGpu = reportGpuMemory(); + sample::gLogInfo << "Total deserialization time = " << totalTime + << " milliseconds in " << kNB_ITERS + << " iterations, average time = " << averageTime + << " milliseconds, first time = " << first + << " milliseconds." << std::endl; + sample::gLogInfo << "Deserialization Bandwidth = " + << 1E-6 * totalEngineSizeGpu / totalTime << " GB/s" + << std::endl; + + // If the first deserialization is more than tolerance slower than + // the average deserialization, return true, which means an error occurred. + // The tolerance is set to 2x since the deserialization time is quick and + // susceptible + // to caching issues causing problems in the first timing. + const auto tolerance = 2.0F; + const bool isSlowerThanExpected = first > averageTime * tolerance; + if (isSlowerThanExpected) { + sample::gLogInfo << "First deserialization time divided by average time is " + << (first / averageTime) << ". Exceeds tolerance of " + << tolerance << "x." << std::endl; + } + return isSlowerThanExpected; +} + +std::string getLayerInformation(const InferenceEnvironment& iEnv, + nvinfer1::LayerInformationFormat format) { + auto runtime = std::unique_ptr( + createInferRuntime(sample::gLogger.getTRTLogger())); + auto inspector = + std::unique_ptr(iEnv.engine->createEngineInspector()); + if (!iEnv.context.empty()) { + inspector->setExecutionContext(iEnv.context.front().get()); + } + std::string result = inspector->getEngineInformation(format); + return result; +} + +} // namespace sample diff --git a/fastdeploy/backends/tensorrt/common/sampleInference.h b/fastdeploy/backends/tensorrt/common/sampleInference.h new file mode 100644 index 0000000000..700dc8bef9 --- /dev/null +++ b/fastdeploy/backends/tensorrt/common/sampleInference.h @@ -0,0 +1,88 @@ +/* + * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef TRT_SAMPLE_INFERENCE_H +#define TRT_SAMPLE_INFERENCE_H + +#include "sampleReporting.h" +#include "sampleUtils.h" + +#include +#include +#include +#include + +#include "NvInfer.h" +#include "NvInferSafeRuntime.h" + +namespace sample { + +struct InferenceEnvironment { + TrtUniquePtr engine; + std::unique_ptr profiler; + std::vector> context; + std::vector> bindings; + bool error{false}; + + std::vector engineBlob; + + bool safe{false}; + std::unique_ptr safeEngine; + std::vector> safeContext; + + template + inline ContextType* getContext(int32_t streamIdx); +}; + +template <> +inline nvinfer1::IExecutionContext* +InferenceEnvironment::getContext(int32_t streamIdx) { + return context[streamIdx].get(); +} + +template <> +inline nvinfer1::safe::IExecutionContext* +InferenceEnvironment::getContext(int32_t streamIdx) { + return safeContext[streamIdx].get(); +} + +//! +//! \brief Set up contexts and bindings for inference +//! +bool setUpInference(InferenceEnvironment& iEnv, + const InferenceOptions& inference); + +//! +//! \brief Deserialize the engine and time how long it takes. +//! +bool timeDeserialize(InferenceEnvironment& iEnv); + +//! +//! \brief Run inference and collect timing, return false if any error hit +//! during inference +//! +bool runInference(const InferenceOptions& inference, InferenceEnvironment& iEnv, + int32_t device, std::vector& trace); + +//! +//! \brief Get layer information of the engine. +//! +std::string getLayerInformation(const InferenceEnvironment& iEnv, + nvinfer1::LayerInformationFormat format); + +} // namespace sample + +#endif // TRT_SAMPLE_INFERENCE_H diff --git a/fastdeploy/backends/tensorrt/common/sampleOptions.cpp b/fastdeploy/backends/tensorrt/common/sampleOptions.cpp new file mode 100644 index 0000000000..a01b4dfde8 --- /dev/null +++ b/fastdeploy/backends/tensorrt/common/sampleOptions.cpp @@ -0,0 +1,1634 @@ +/* + * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "NvInfer.h" + +#include "logger.h" +#include "sampleOptions.h" + +namespace sample { + +namespace { + +std::vector splitToStringVec(const std::string& option, + char separator) { + std::vector options; + + for (size_t start = 0; start < option.length();) { + size_t separatorIndex = option.find(separator, start); + if (separatorIndex == std::string::npos) { + separatorIndex = option.length(); + } + options.emplace_back(option.substr(start, separatorIndex - start)); + start = separatorIndex + 1; + } + + return options; +} + +template T stringToValue(const std::string& option) { + return T{option}; +} + +template <> int32_t stringToValue(const std::string& option) { + return std::stoi(option); +} + +template <> float stringToValue(const std::string& option) { + return std::stof(option); +} + +template <> double stringToValue(const std::string& option) { + return std::stod(option); +} + +template <> bool stringToValue(const std::string& option) { return true; } + +template <> +std::vector +stringToValue>(const std::string& option) { + std::vector shape; + std::vector dimsStrings = splitToStringVec(option, 'x'); + for (const auto& d : dimsStrings) { + shape.push_back(stringToValue(d)); + } + return shape; +} + +template <> +nvinfer1::DataType +stringToValue(const std::string& option) { + const std::unordered_map strToDT{ + {"fp32", nvinfer1::DataType::kFLOAT}, + {"fp16", nvinfer1::DataType::kHALF}, + {"int8", nvinfer1::DataType::kINT8}, + {"int32", nvinfer1::DataType::kINT32}}; + const auto& dt = strToDT.find(option); + if (dt == strToDT.end()) { + throw std::invalid_argument("Invalid DataType " + option); + } + return dt->second; +} + +template <> +nvinfer1::TensorFormats +stringToValue(const std::string& option) { + std::vector optionStrings = splitToStringVec(option, '+'); + const std::unordered_map strToFmt{ + {"chw", nvinfer1::TensorFormat::kLINEAR}, + {"chw2", nvinfer1::TensorFormat::kCHW2}, + {"chw4", nvinfer1::TensorFormat::kCHW4}, + {"hwc8", nvinfer1::TensorFormat::kHWC8}, + {"chw16", nvinfer1::TensorFormat::kCHW16}, + {"chw32", nvinfer1::TensorFormat::kCHW32}, + {"dhwc8", nvinfer1::TensorFormat::kDHWC8}, + {"hwc", nvinfer1::TensorFormat::kHWC}, + {"dla_linear", nvinfer1::TensorFormat::kDLA_LINEAR}, + {"dla_hwc4", nvinfer1::TensorFormat::kDLA_HWC4}}; + nvinfer1::TensorFormats formats{}; + for (auto f : optionStrings) { + const auto& tf = strToFmt.find(f); + if (tf == strToFmt.end()) { + throw std::invalid_argument(std::string("Invalid TensorFormat ") + f); + } + formats |= 1U << static_cast(tf->second); + } + + return formats; +} + +template <> IOFormat stringToValue(const std::string& option) { + IOFormat ioFormat{}; + const size_t colon = option.find(':'); + + if (colon == std::string::npos) { + throw std::invalid_argument(std::string("Invalid IOFormat ") + option); + } + + ioFormat.first = stringToValue(option.substr(0, colon)); + ioFormat.second = + stringToValue(option.substr(colon + 1)); + + return ioFormat; +} + +template +std::pair splitNameAndValue(const std::string& s) { + std::string tensorName; + std::string valueString; + // Split on the last : + std::vector nameRange{splitToStringVec(s, ':')}; + // Everything before the last : is the name + tensorName = nameRange[0]; + for (size_t i = 1; i < nameRange.size() - 1; i++) { + tensorName += ":" + nameRange[i]; + } + // Value is the string element after the last : + valueString = nameRange[nameRange.size() - 1]; + return std::pair(tensorName, stringToValue(valueString)); +} + +template +void splitInsertKeyValue(const std::vector& kvList, T& map) { + for (const auto& kv : kvList) { + map.insert(splitNameAndValue(kv)); + } +} + +const char* boolToEnabled(bool enable) { + return enable ? "Enabled" : "Disabled"; +} + +//! Check if input option exists in input arguments. +//! If it does: return its value, erase the argument and return true. +//! If it does not: return false. +template +bool getAndDelOption(Arguments& arguments, const std::string& option, + T& value) { + const auto match = arguments.find(option); + if (match != arguments.end()) { + value = stringToValue(match->second); + arguments.erase(match); + return true; + } + + return false; +} + +//! Check if input option exists in input arguments. +//! If it does: return false in value, erase the argument and return true. +//! If it does not: return false. +bool getAndDelNegOption(Arguments& arguments, const std::string& option, + bool& value) { + bool dummy; + if (getAndDelOption(arguments, option, dummy)) { + value = false; + return true; + } + return false; +} + +//! Check if input option exists in input arguments. +//! If it does: add all the matched arg values to values vector, erase the +//! argument and return true. +//! If it does not: return false. +template +bool getAndDelRepeatedOption(Arguments& arguments, const std::string& option, + std::vector& values) { + const auto match = arguments.equal_range(option); + if (match.first == match.second) { + return false; + } + + auto addToValues = [&values](Arguments::value_type& argValue) { + values.emplace_back(stringToValue(argValue.second)); + }; + std::for_each(match.first, match.second, addToValues); + arguments.erase(match.first, match.second); + + return true; +} + +void insertShapesBuild(std::unordered_map& shapes, + nvinfer1::OptProfileSelector selector, + const std::string& name, + const std::vector& dims) { + shapes[name][static_cast(selector)] = dims; +} + +void insertShapesInference( + std::unordered_map>& shapes, + const std::string& name, const std::vector& dims) { + shapes[name] = dims; +} + +std::string removeSingleQuotationMarks(std::string& str) { + std::vector strList{splitToStringVec(str, '\'')}; + // Remove all the escaped single quotation marks + std::string retVal = ""; + // Do not really care about unterminated sequences + for (size_t i = 0; i < strList.size(); i++) { + retVal += strList[i]; + } + return retVal; +} + +void getLayerPrecisions(Arguments& arguments, char const* argument, + LayerPrecisions& layerPrecisions) { + std::string list; + if (!getAndDelOption(arguments, argument, list)) { + return; + } + + // The layerPrecisions flag contains comma-separated layerName:precision + // pairs. + std::vector precisionList{splitToStringVec(list, ',')}; + for (auto const& s : precisionList) { + auto namePrecisionPair = splitNameAndValue(s); + auto const layerName = removeSingleQuotationMarks(namePrecisionPair.first); + layerPrecisions[layerName] = namePrecisionPair.second; + } +} + +void getLayerOutputTypes(Arguments& arguments, char const* argument, + LayerOutputTypes& layerOutputTypes) { + std::string list; + if (!getAndDelOption(arguments, argument, list)) { + return; + } + + // The layerOutputTypes flag contains comma-separated layerName:types pairs. + std::vector precisionList{splitToStringVec(list, ',')}; + for (auto const& s : precisionList) { + auto namePrecisionPair = splitNameAndValue(s); + auto const layerName = removeSingleQuotationMarks(namePrecisionPair.first); + auto const typeStrings = splitToStringVec(namePrecisionPair.second, '+'); + std::vector typeVec(typeStrings.size(), + nvinfer1::DataType::kFLOAT); + std::transform(typeStrings.begin(), typeStrings.end(), typeVec.begin(), + stringToValue); + layerOutputTypes[layerName] = typeVec; + } +} + +bool getShapesBuild(Arguments& arguments, + std::unordered_map& shapes, + char const* argument, + nvinfer1::OptProfileSelector selector) { + std::string list; + bool retVal = getAndDelOption(arguments, argument, list); + std::vector shapeList{splitToStringVec(list, ',')}; + for (const auto& s : shapeList) { + auto nameDimsPair = splitNameAndValue>(s); + auto tensorName = removeSingleQuotationMarks(nameDimsPair.first); + auto dims = nameDimsPair.second; + insertShapesBuild(shapes, selector, tensorName, dims); + } + return retVal; +} + +bool getShapesInference( + Arguments& arguments, + std::unordered_map>& shapes, + const char* argument) { + std::string list; + bool retVal = getAndDelOption(arguments, argument, list); + std::vector shapeList{splitToStringVec(list, ',')}; + for (const auto& s : shapeList) { + auto nameDimsPair = splitNameAndValue>(s); + auto tensorName = removeSingleQuotationMarks(nameDimsPair.first); + auto dims = nameDimsPair.second; + insertShapesInference(shapes, tensorName, dims); + } + return retVal; +} + +void processShapes(std::unordered_map& shapes, + bool minShapes, bool optShapes, bool maxShapes, bool calib) { + // Only accept optShapes only or all three of minShapes, optShapes, maxShapes + if (((minShapes || maxShapes) && !optShapes) // minShapes only, maxShapes + // only, both minShapes and + // maxShapes + || (minShapes && !maxShapes && optShapes) // both minShapes and optShapes + || (!minShapes && maxShapes && optShapes)) // both maxShapes and optShapes + { + if (calib) { + throw std::invalid_argument( + "Must specify only --optShapesCalib or all of --minShapesCalib, " + "--optShapesCalib, --maxShapesCalib"); + } else { + throw std::invalid_argument( + "Must specify only --optShapes or all of --minShapes, --optShapes, " + "--maxShapes"); + } + } + + // If optShapes only, expand optShapes to minShapes and maxShapes + if (optShapes && !minShapes && !maxShapes) { + std::unordered_map newShapes; + for (auto& s : shapes) { + insertShapesBuild( + newShapes, nvinfer1::OptProfileSelector::kMIN, s.first, + s.second[static_cast(nvinfer1::OptProfileSelector::kOPT)]); + insertShapesBuild( + newShapes, nvinfer1::OptProfileSelector::kOPT, s.first, + s.second[static_cast(nvinfer1::OptProfileSelector::kOPT)]); + insertShapesBuild( + newShapes, nvinfer1::OptProfileSelector::kMAX, s.first, + s.second[static_cast(nvinfer1::OptProfileSelector::kOPT)]); + } + shapes = newShapes; + } +} + +template +void printShapes(std::ostream& os, const char* phase, const T& shapes) { + if (shapes.empty()) { + os << "Input " << phase << " shapes: model" << std::endl; + } else { + for (const auto& s : shapes) { + os << "Input " << phase << " shape: " << s.first << "=" << s.second + << std::endl; + } + } +} + +std::ostream& printBatch(std::ostream& os, int32_t maxBatch) { + if (maxBatch != maxBatchNotProvided) { + os << maxBatch; + } else { + os << "explicit batch"; + } + return os; +} + +std::ostream& printTacticSources(std::ostream& os, + nvinfer1::TacticSources enabledSources, + nvinfer1::TacticSources disabledSources) { + if (!enabledSources && !disabledSources) { + os << "Using default tactic sources"; + } else { + auto const addSource = [&](uint32_t source, std::string const& name) { + if (enabledSources & source) { + os << name << " [ON], "; + } else if (disabledSources & source) { + os << name << " [OFF], "; + } + }; + + addSource(1U << static_cast(nvinfer1::TacticSource::kCUBLAS), + "cublas"); + addSource(1U << static_cast(nvinfer1::TacticSource::kCUBLAS_LT), + "cublasLt"); + addSource(1U << static_cast(nvinfer1::TacticSource::kCUDNN), + "cudnn"); + } + return os; +} + +std::ostream& printPrecision(std::ostream& os, BuildOptions const& options) { + os << "FP32"; + if (options.fp16) { + os << "+FP16"; + } + if (options.int8) { + os << "+INT8"; + } + if (options.precisionConstraints == PrecisionConstraints::kOBEY) { + os << " (obey precision constraints)"; + } + if (options.precisionConstraints == PrecisionConstraints::kPREFER) { + os << " (prefer precision constraints)"; + } + return os; +} + +std::ostream& printTimingCache(std::ostream& os, BuildOptions const& options) { + switch (options.timingCacheMode) { + case TimingCacheMode::kGLOBAL: + os << "global"; + break; + case TimingCacheMode::kLOCAL: + os << "local"; + break; + case TimingCacheMode::kDISABLE: + os << "disable"; + break; + } + return os; +} + +std::ostream& printSparsity(std::ostream& os, BuildOptions const& options) { + switch (options.sparsity) { + case SparsityFlag::kDISABLE: + os << "Disabled"; + break; + case SparsityFlag::kENABLE: + os << "Enabled"; + break; + case SparsityFlag::kFORCE: + os << "Forced"; + break; + } + + return os; +} + +std::ostream& printMemoryPools(std::ostream& os, BuildOptions const& options) { + auto const printValueOrDefault = [&os](double const val) { + if (val >= 0) { + os << val << " MiB"; + } else { + os << "default"; + } + }; + os << "workspace: "; + printValueOrDefault(options.workspace); + os << ", "; + os << "dlaSRAM: "; + printValueOrDefault(options.dlaSRAM); + os << ", "; + os << "dlaLocalDRAM: "; + printValueOrDefault(options.dlaLocalDRAM); + os << ", "; + os << "dlaGlobalDRAM: "; + printValueOrDefault(options.dlaGlobalDRAM); + return os; +} + +} // namespace + +Arguments argsToArgumentsMap(int32_t argc, char* argv[]) { + Arguments arguments; + for (int32_t i = 1; i < argc; ++i) { + auto valuePtr = strchr(argv[i], '='); + if (valuePtr) { + std::string value{valuePtr + 1}; + arguments.emplace(std::string(argv[i], valuePtr - argv[i]), value); + } else { + arguments.emplace(argv[i], ""); + } + } + return arguments; +} + +void BaseModelOptions::parse(Arguments& arguments) { + if (getAndDelOption(arguments, "--onnx", model)) { + format = ModelFormat::kONNX; + } else if (getAndDelOption(arguments, "--uff", model)) { + format = ModelFormat::kUFF; + } else if (getAndDelOption(arguments, "--model", model)) { + format = ModelFormat::kCAFFE; + } +} + +void UffInput::parse(Arguments& arguments) { + getAndDelOption(arguments, "--uffNHWC", NHWC); + std::vector args; + if (getAndDelRepeatedOption(arguments, "--uffInput", args)) { + for (const auto& i : args) { + std::vector values{splitToStringVec(i, ',')}; + if (values.size() == 4) { + nvinfer1::Dims3 dims{std::stoi(values[1]), std::stoi(values[2]), + std::stoi(values[3])}; + inputs.emplace_back(values[0], dims); + } else { + throw std::invalid_argument(std::string("Invalid uffInput ") + i); + } + } + } +} + +void ModelOptions::parse(Arguments& arguments) { + baseModel.parse(arguments); + + switch (baseModel.format) { + case ModelFormat::kCAFFE: { + getAndDelOption(arguments, "--deploy", prototxt); + break; + } + case ModelFormat::kUFF: { + uffInputs.parse(arguments); + if (uffInputs.inputs.empty()) { + throw std::invalid_argument("Uff models require at least one input"); + } + break; + } + case ModelFormat::kONNX: + break; + case ModelFormat::kANY: { + if (getAndDelOption(arguments, "--deploy", prototxt)) { + baseModel.format = ModelFormat::kCAFFE; + } + break; + } + } + + // The --output flag should only be used with Caffe and UFF. It has no effect + // on ONNX. + std::vector outArgs; + if (getAndDelRepeatedOption(arguments, "--output", outArgs)) { + for (const auto& o : outArgs) { + for (auto& v : splitToStringVec(o, ',')) { + outputs.emplace_back(std::move(v)); + } + } + } + if (baseModel.format == ModelFormat::kCAFFE || + baseModel.format == ModelFormat::kUFF) { + if (outputs.empty()) { + throw std::invalid_argument( + "Caffe and Uff models require at least one output"); + } + } else if (baseModel.format == ModelFormat::kONNX) { + if (!outputs.empty()) { + throw std::invalid_argument( + "The --output flag should not be used with ONNX models."); + } + } +} + +void BuildOptions::parse(Arguments& arguments) { + auto getFormats = [&arguments](std::vector& formatsVector, + const char* argument) { + std::string list; + getAndDelOption(arguments, argument, list); + std::vector formats{splitToStringVec(list, ',')}; + for (const auto& f : formats) { + formatsVector.push_back(stringToValue(f)); + } + }; + + getFormats(inputFormats, "--inputIOFormats"); + getFormats(outputFormats, "--outputIOFormats"); + + bool addedExplicitBatchFlag{false}; + getAndDelOption(arguments, "--explicitBatch", addedExplicitBatchFlag); + if (addedExplicitBatchFlag) { + sample::gLogWarning + << "--explicitBatch flag has been deprecated and has no effect!" + << std::endl; + sample::gLogWarning << "Explicit batch dim is automatically enabled if " + "input model is ONNX or if dynamic " + << "shapes are provided when the engine is built." + << std::endl; + } + + bool minShapes = getShapesBuild(arguments, shapes, "--minShapes", + nvinfer1::OptProfileSelector::kMIN); + bool optShapes = getShapesBuild(arguments, shapes, "--optShapes", + nvinfer1::OptProfileSelector::kOPT); + bool maxShapes = getShapesBuild(arguments, shapes, "--maxShapes", + nvinfer1::OptProfileSelector::kMAX); + processShapes(shapes, minShapes, optShapes, maxShapes, false); + bool minShapesCalib = + getShapesBuild(arguments, shapesCalib, "--minShapesCalib", + nvinfer1::OptProfileSelector::kMIN); + bool optShapesCalib = + getShapesBuild(arguments, shapesCalib, "--optShapesCalib", + nvinfer1::OptProfileSelector::kOPT); + bool maxShapesCalib = + getShapesBuild(arguments, shapesCalib, "--maxShapesCalib", + nvinfer1::OptProfileSelector::kMAX); + processShapes(shapesCalib, minShapesCalib, optShapesCalib, maxShapesCalib, + true); + + bool addedExplicitPrecisionFlag{false}; + getAndDelOption(arguments, "--explicitPrecision", addedExplicitPrecisionFlag); + if (addedExplicitPrecisionFlag) { + sample::gLogWarning + << "--explicitPrecision flag has been deprecated and has no effect!" + << std::endl; + } + + if (getAndDelOption(arguments, "--workspace", workspace)) { + sample::gLogWarning + << "--workspace flag has been deprecated by --memPoolSize flag." + << std::endl; + } + + std::string memPoolSizes; + getAndDelOption(arguments, "--memPoolSize", memPoolSizes); + std::vector memPoolSpecs{splitToStringVec(memPoolSizes, ',')}; + for (auto const& memPoolSpec : memPoolSpecs) { + std::string memPoolName; + double memPoolSize; + std::tie(memPoolName, memPoolSize) = splitNameAndValue(memPoolSpec); + if (memPoolSize < 0) { + throw std::invalid_argument(std::string("Negative memory pool size: ") + + std::to_string(memPoolSize)); + } + if (memPoolName == "workspace") { + workspace = memPoolSize; + } else if (memPoolName == "dlaSRAM") { + dlaSRAM = memPoolSize; + } else if (memPoolName == "dlaLocalDRAM") { + dlaLocalDRAM = memPoolSize; + } else if (memPoolName == "dlaGlobalDRAM") { + dlaGlobalDRAM = memPoolSize; + } else if (!memPoolName.empty()) { + throw std::invalid_argument(std::string("Unknown memory pool: ") + + memPoolName); + } + } + + getAndDelOption(arguments, "--maxBatch", maxBatch); + getAndDelOption(arguments, "--minTiming", minTiming); + getAndDelOption(arguments, "--avgTiming", avgTiming); + + bool best{false}; + getAndDelOption(arguments, "--best", best); + if (best) { + int8 = true; + fp16 = true; + } + + getAndDelOption(arguments, "--refit", refittable); + getAndDelNegOption(arguments, "--noTF32", tf32); + getAndDelOption(arguments, "--fp16", fp16); + getAndDelOption(arguments, "--int8", int8); + getAndDelOption(arguments, "--safe", safe); + getAndDelOption(arguments, "--consistency", consistency); + getAndDelOption(arguments, "--restricted", restricted); + + getAndDelOption(arguments, "--directIO", directIO); + + std::string precisionConstraintsString; + getAndDelOption(arguments, "--precisionConstraints", + precisionConstraintsString); + if (!precisionConstraintsString.empty()) { + const std::unordered_map + precisionConstraintsMap = {{"obey", PrecisionConstraints::kOBEY}, + {"prefer", PrecisionConstraints::kPREFER}, + {"none", PrecisionConstraints::kNONE}}; + auto it = precisionConstraintsMap.find(precisionConstraintsString); + if (it == precisionConstraintsMap.end()) { + throw std::invalid_argument( + std::string("Unknown precision constraints: ") + + precisionConstraintsString); + } + precisionConstraints = it->second; + } else { + precisionConstraints = PrecisionConstraints::kNONE; + } + + getLayerPrecisions(arguments, "--layerPrecisions", layerPrecisions); + getLayerOutputTypes(arguments, "--layerOutputTypes", layerOutputTypes); + + if (layerPrecisions.empty() && layerOutputTypes.empty() && + precisionConstraints != PrecisionConstraints::kNONE) { + sample::gLogWarning << "When --precisionConstraints flag is set to " + "\"obey\" or \"prefer\", please add " + << "--layerPrecision/--layerOutputTypes flags to set " + "layer-wise precisions and output " + << "types." << std::endl; + } else if ((!layerPrecisions.empty() || !layerOutputTypes.empty()) && + precisionConstraints == PrecisionConstraints::kNONE) { + sample::gLogWarning << "--layerPrecision/--layerOutputTypes flags have no " + "effect when --precisionConstraints " + << "flag is set to \"none\"." << std::endl; + } + + std::string sparsityString; + getAndDelOption(arguments, "--sparsity", sparsityString); + if (sparsityString == "disable") { + sparsity = SparsityFlag::kDISABLE; + } else if (sparsityString == "enable") { + sparsity = SparsityFlag::kENABLE; + } else if (sparsityString == "force") { + sparsity = SparsityFlag::kFORCE; + } else if (!sparsityString.empty()) { + throw std::invalid_argument(std::string("Unknown sparsity mode: ") + + sparsityString); + } + + bool calibCheck = getAndDelOption(arguments, "--calib", calibration); + if (int8 && calibCheck && !shapes.empty() && shapesCalib.empty()) { + shapesCalib = shapes; + } + + std::string profilingVerbosityString; + if (getAndDelOption(arguments, "--nvtxMode", profilingVerbosityString)) { + sample::gLogWarning + << "--nvtxMode flag has been deprecated by --profilingVerbosity flag." + << std::endl; + } + + getAndDelOption(arguments, "--profilingVerbosity", profilingVerbosityString); + if (profilingVerbosityString == "layer_names_only") { + profilingVerbosity = nvinfer1::ProfilingVerbosity::kLAYER_NAMES_ONLY; + } else if (profilingVerbosityString == "none") { + profilingVerbosity = nvinfer1::ProfilingVerbosity::kNONE; + } else if (profilingVerbosityString == "detailed") { + profilingVerbosity = nvinfer1::ProfilingVerbosity::kDETAILED; + } else if (profilingVerbosityString == "default") { + sample::gLogWarning + << "--profilingVerbosity=default has been deprecated by " + "--profilingVerbosity=layer_names_only." + << std::endl; + profilingVerbosity = nvinfer1::ProfilingVerbosity::kLAYER_NAMES_ONLY; + } else if (profilingVerbosityString == "verbose") { + sample::gLogWarning << "--profilingVerbosity=verbose has been deprecated " + "by --profilingVerbosity=detailed." + << std::endl; + profilingVerbosity = nvinfer1::ProfilingVerbosity::kDETAILED; + } else if (!profilingVerbosityString.empty()) { + throw std::invalid_argument(std::string("Unknown profilingVerbosity: ") + + profilingVerbosityString); + } + + if (getAndDelOption(arguments, "--loadEngine", engine)) { + load = true; + } + if (getAndDelOption(arguments, "--saveEngine", engine)) { + save = true; + } + if (load && save) { + throw std::invalid_argument( + "Incompatible load and save engine options selected"); + } + + std::string tacticSourceArgs; + if (getAndDelOption(arguments, "--tacticSources", tacticSourceArgs)) { + std::vector tacticList = + splitToStringVec(tacticSourceArgs, ','); + for (auto& t : tacticList) { + bool enable{false}; + if (t.front() == '+') { + enable = true; + } else if (t.front() != '-') { + throw std::invalid_argument( + "Tactic source must be prefixed with + or -, indicating whether it " + "should be enabled or disabled " + "respectively."); + } + t.erase(0, 1); + + const auto toUpper = [](std::string& sourceName) { + std::transform(sourceName.begin(), sourceName.end(), sourceName.begin(), + [](char c) { return std::toupper(c); }); + return sourceName; + }; + + nvinfer1::TacticSource source{}; + t = toUpper(t); + if (t == "CUBLAS") { + source = nvinfer1::TacticSource::kCUBLAS; + } else if (t == "CUBLASLT" || t == "CUBLAS_LT") { + source = nvinfer1::TacticSource::kCUBLAS_LT; + } else if (t == "CUDNN") { + source = nvinfer1::TacticSource::kCUDNN; + } else { + throw std::invalid_argument(std::string("Unknown tactic source: ") + t); + } + + uint32_t sourceBit = 1U << static_cast(source); + + if (enable) { + enabledTactics |= sourceBit; + } else { + disabledTactics |= sourceBit; + } + + if (enabledTactics & disabledTactics) { + throw std::invalid_argument(std::string("Cannot enable and disable ") + + t); + } + } + } + + bool noBuilderCache{false}; + getAndDelOption(arguments, "--noBuilderCache", noBuilderCache); + getAndDelOption(arguments, "--timingCacheFile", timingCacheFile); + if (noBuilderCache) { + timingCacheMode = TimingCacheMode::kDISABLE; + } else if (!timingCacheFile.empty()) { + timingCacheMode = TimingCacheMode::kGLOBAL; + } else { + timingCacheMode = TimingCacheMode::kLOCAL; + } +} + +void SystemOptions::parse(Arguments& arguments) { + getAndDelOption(arguments, "--device", device); + getAndDelOption(arguments, "--useDLACore", DLACore); + getAndDelOption(arguments, "--allowGPUFallback", fallback); + std::string pluginName; + while (getAndDelOption(arguments, "--plugins", pluginName)) { + plugins.emplace_back(pluginName); + } +} + +void InferenceOptions::parse(Arguments& arguments) { + getAndDelOption(arguments, "--streams", streams); + getAndDelOption(arguments, "--iterations", iterations); + getAndDelOption(arguments, "--duration", duration); + getAndDelOption(arguments, "--warmUp", warmup); + getAndDelOption(arguments, "--sleepTime", sleep); + getAndDelOption(arguments, "--idleTime", idle); + bool exposeDMA{false}; + if (getAndDelOption(arguments, "--exposeDMA", exposeDMA)) { + overlap = !exposeDMA; + } + getAndDelOption(arguments, "--noDataTransfers", skipTransfers); + getAndDelOption(arguments, "--useManagedMemory", useManaged); + getAndDelOption(arguments, "--useSpinWait", spin); + getAndDelOption(arguments, "--threads", threads); + getAndDelOption(arguments, "--useCudaGraph", graph); + getAndDelOption(arguments, "--separateProfileRun", rerun); + getAndDelOption(arguments, "--buildOnly", skip); + getAndDelOption(arguments, "--timeDeserialize", timeDeserialize); + getAndDelOption(arguments, "--timeRefit", timeRefit); + + std::string list; + getAndDelOption(arguments, "--loadInputs", list); + std::vector inputsList{splitToStringVec(list, ',')}; + splitInsertKeyValue(inputsList, inputs); + + getShapesInference(arguments, shapes, "--shapes"); + getAndDelOption(arguments, "--batch", batch); +} + +void ReportingOptions::parse(Arguments& arguments) { + getAndDelOption(arguments, "--percentile", percentile); + getAndDelOption(arguments, "--avgRuns", avgs); + getAndDelOption(arguments, "--verbose", verbose); + getAndDelOption(arguments, "--dumpRefit", refit); + getAndDelOption(arguments, "--dumpOutput", output); + getAndDelOption(arguments, "--dumpProfile", profile); + getAndDelOption(arguments, "--dumpLayerInfo", layerInfo); + getAndDelOption(arguments, "--exportTimes", exportTimes); + getAndDelOption(arguments, "--exportOutput", exportOutput); + getAndDelOption(arguments, "--exportProfile", exportProfile); + getAndDelOption(arguments, "--exportLayerInfo", exportLayerInfo); + if (percentile < 0 || percentile > 100) { + throw std::invalid_argument(std::string("Percentile ") + + std::to_string(percentile) + + "is not in [0,100]"); + } +} + +bool parseHelp(Arguments& arguments) { + bool helpLong{false}; + bool helpShort{false}; + getAndDelOption(arguments, "--help", helpLong); + getAndDelOption(arguments, "-h", helpShort); + return helpLong || helpShort; +} + +void AllOptions::parse(Arguments& arguments) { + model.parse(arguments); + build.parse(arguments); + system.parse(arguments); + inference.parse(arguments); + + // Use explicitBatch when input model is ONNX or when dynamic shapes are used. + const bool isOnnx{model.baseModel.format == ModelFormat::kONNX}; + const bool hasDynamicShapes{!build.shapes.empty() || + !inference.shapes.empty()}; + const bool detectedExplicitBatch = isOnnx || hasDynamicShapes; + + // Throw an error if user tries to use --batch or --maxBatch when the engine + // has explicit batch dim. + const bool maxBatchWasSet{build.maxBatch != maxBatchNotProvided}; + const bool batchWasSet{inference.batch != batchNotProvided}; + if (detectedExplicitBatch && (maxBatchWasSet || batchWasSet)) { + throw std::invalid_argument( + "The --batch and --maxBatch flags should not be used when the input " + "model is ONNX or when dynamic shapes " + "are provided. Please use --optShapes and --shapes to set input shapes " + "instead."); + } + + // If batch and/or maxBatch is not set and the engine has implicit batch dim, + // set them to default values. + if (!detectedExplicitBatch) { + // If batch is not set, set it to default value. + if (!batchWasSet) { + inference.batch = defaultBatch; + } + // If maxBatch is not set, set it to be equal to batch. + if (!maxBatchWasSet) { + build.maxBatch = inference.batch; + } + // MaxBatch should not be less than batch. + if (build.maxBatch < inference.batch) { + throw std::invalid_argument( + "Build max batch " + std::to_string(build.maxBatch) + + " is less than inference batch " + std::to_string(inference.batch)); + } + } + + if (build.shapes.empty() && !inference.shapes.empty()) { + // If --shapes are provided but --optShapes are not, assume that optShapes + // is the same as shapes. + for (auto& s : inference.shapes) { + insertShapesBuild(build.shapes, nvinfer1::OptProfileSelector::kMIN, + s.first, s.second); + insertShapesBuild(build.shapes, nvinfer1::OptProfileSelector::kOPT, + s.first, s.second); + insertShapesBuild(build.shapes, nvinfer1::OptProfileSelector::kMAX, + s.first, s.second); + } + } else if (!build.shapes.empty() && inference.shapes.empty()) { + // If --optShapes are provided but --shapes are not, assume that shapes is + // the same as optShapes. + for (auto& s : build.shapes) { + insertShapesInference( + inference.shapes, s.first, + s.second[static_cast(nvinfer1::OptProfileSelector::kOPT)]); + } + } + + reporting.parse(arguments); + helps = parseHelp(arguments); + + if (!helps) { + if (!build.load && model.baseModel.format == ModelFormat::kANY) { + throw std::invalid_argument("Model missing or format not recognized"); + } + if (build.safe && system.DLACore >= 0) { + auto checkSafeDLAFormats = [](std::vector const& fmt) { + return fmt.empty() + ? false + : std::all_of(fmt.begin(), fmt.end(), + [](IOFormat const& pair) { + bool supported{false}; + bool const isLINEAR{ + pair.second == + 1U << static_cast( + nvinfer1::TensorFormat::kLINEAR)}; + bool const isCHW4{ + pair.second == + 1U << static_cast( + nvinfer1::TensorFormat::kCHW4)}; + bool const isCHW32{ + pair.second == + 1U << static_cast( + nvinfer1::TensorFormat::kCHW32)}; + bool const isCHW16{ + pair.second == + 1U << static_cast( + nvinfer1::TensorFormat::kCHW16)}; + supported |= pair.first == + nvinfer1::DataType::kINT8 && + (isLINEAR || isCHW4 || isCHW32); + supported |= pair.first == + nvinfer1::DataType::kHALF && + (isLINEAR || isCHW4 || isCHW16); + return supported; + }); + }; + if (!checkSafeDLAFormats(build.inputFormats) || + !checkSafeDLAFormats(build.outputFormats)) { + throw std::invalid_argument( + "I/O formats for safe DLA capability are restricted to " + "fp16/int8:linear, fp16:chw16 or int8:chw32"); + } + if (system.fallback) { + throw std::invalid_argument( + "GPU fallback (--allowGPUFallback) not allowed for safe DLA " + "capability"); + } + } + } +} + +void SafeBuilderOptions::parse(Arguments& arguments) { + auto getFormats = [&arguments](std::vector& formatsVector, + const char* argument) { + std::string list; + getAndDelOption(arguments, argument, list); + std::vector formats{splitToStringVec(list, ',')}; + for (const auto& f : formats) { + formatsVector.push_back(stringToValue(f)); + } + }; + + getAndDelOption(arguments, "--serialized", serialized); + getAndDelOption(arguments, "--onnx", onnxModelFile); + getAndDelOption(arguments, "--help", help); + getAndDelOption(arguments, "-h", help); + getAndDelOption(arguments, "--verbose", verbose); + getAndDelOption(arguments, "-v", verbose); + getFormats(inputFormats, "--inputIOFormats"); + getFormats(outputFormats, "--outputIOFormats"); + getAndDelOption(arguments, "--int8", int8); + getAndDelOption(arguments, "--calib", calibFile); + getAndDelOption(arguments, "--consistency", consistency); + getAndDelOption(arguments, "--std", standard); + std::string pluginName; + while (getAndDelOption(arguments, "--plugins", pluginName)) { + plugins.emplace_back(pluginName); + } +} + +std::ostream& operator<<(std::ostream& os, const BaseModelOptions& options) { + os << "=== Model Options ===" << std::endl; + + os << "Format: "; + switch (options.format) { + case ModelFormat::kCAFFE: { + os << "Caffe"; + break; + } + case ModelFormat::kONNX: { + os << "ONNX"; + break; + } + case ModelFormat::kUFF: { + os << "UFF"; + break; + } + case ModelFormat::kANY: + os << "*"; + break; + } + os << std::endl << "Model: " << options.model << std::endl; + + return os; +} + +std::ostream& operator<<(std::ostream& os, const UffInput& input) { + os << "Uff Inputs Layout: " << (input.NHWC ? "NHWC" : "NCHW") << std::endl; + for (const auto& i : input.inputs) { + os << "Input: " << i.first << "," << i.second.d[0] << "," << i.second.d[1] + << "," << i.second.d[2] << std::endl; + } + + return os; +} + +std::ostream& operator<<(std::ostream& os, const ModelOptions& options) { + os << options.baseModel; + switch (options.baseModel.format) { + case ModelFormat::kCAFFE: { + os << "Prototxt: " << options.prototxt << std::endl; + break; + } + case ModelFormat::kUFF: { + os << options.uffInputs; + break; + } + case ModelFormat::kONNX: // Fallthrough: No options to report for ONNX or + // the generic case + case ModelFormat::kANY: + break; + } + + os << "Output:"; + for (const auto& o : options.outputs) { + os << " " << o; + } + os << std::endl; + + return os; +} + +std::ostream& operator<<(std::ostream& os, nvinfer1::DataType dtype) { + switch (dtype) { + case nvinfer1::DataType::kFLOAT: { + os << "fp32"; + break; + } + case nvinfer1::DataType::kHALF: { + os << "fp16"; + break; + } + case nvinfer1::DataType::kINT8: { + os << "int8"; + break; + } + case nvinfer1::DataType::kINT32: { + os << "int32"; + break; + } + case nvinfer1::DataType::kBOOL: { + os << "bool"; + break; + } + } + return os; +} + +std::ostream& operator<<(std::ostream& os, IOFormat const& format) { + os << format.first << ":"; + + for (int32_t f = 0; f < nvinfer1::EnumMax(); ++f) { + if ((1U << f) & format.second) { + if (f) { + os << "+"; + } + switch (nvinfer1::TensorFormat(f)) { + case nvinfer1::TensorFormat::kLINEAR: { + os << "chw"; + break; + } + case nvinfer1::TensorFormat::kCHW2: { + os << "chw2"; + break; + } + case nvinfer1::TensorFormat::kHWC8: { + os << "hwc8"; + break; + } + case nvinfer1::TensorFormat::kHWC16: { + os << "hwc16"; + break; + } + case nvinfer1::TensorFormat::kCHW4: { + os << "chw4"; + break; + } + case nvinfer1::TensorFormat::kCHW16: { + os << "chw16"; + break; + } + case nvinfer1::TensorFormat::kCHW32: { + os << "chw32"; + break; + } + case nvinfer1::TensorFormat::kDHWC8: { + os << "dhwc8"; + break; + } + case nvinfer1::TensorFormat::kCDHW32: { + os << "cdhw32"; + break; + } + case nvinfer1::TensorFormat::kHWC: { + os << "hwc"; + break; + } + case nvinfer1::TensorFormat::kDLA_LINEAR: { + os << "dla_linear"; + break; + } + case nvinfer1::TensorFormat::kDLA_HWC4: { + os << "dla_hwc4"; + break; + } + } + } + } + return os; +} + +std::ostream& operator<<(std::ostream& os, const ShapeRange& dims) { + int32_t i = 0; + for (const auto& d : dims) { + if (!d.size()) { + break; + } + os << (i ? "+" : "") << d; + ++i; + } + return os; +} + +std::ostream& operator<<(std::ostream& os, + LayerPrecisions const& layerPrecisions) { + int32_t i = 0; + for (auto const& layerPrecision : layerPrecisions) { + os << (i ? "," : "") << layerPrecision.first << ":" + << layerPrecision.second; + ++i; + } + return os; +} + +std::ostream& operator<<(std::ostream& os, const BuildOptions& options) { + // clang-format off + os << "=== Build Options ===" << std::endl << + + "Max batch: "; printBatch(os, options.maxBatch) << std::endl << + "Memory Pools: "; printMemoryPools(os, options) << std::endl << + "minTiming: " << options.minTiming << std::endl << + "avgTiming: " << options.avgTiming << std::endl << + "Precision: "; printPrecision(os, options) << std::endl << + "LayerPrecisions: " << options.layerPrecisions << std::endl << + "Calibration: " << (options.int8 && options.calibration.empty() ? "Dynamic" : options.calibration.c_str()) << std::endl << + "Refit: " << boolToEnabled(options.refittable) << std::endl << + "Sparsity: "; printSparsity(os, options) << std::endl << + "Safe mode: " << boolToEnabled(options.safe) << std::endl << + "DirectIO mode: " << boolToEnabled(options.directIO) << std::endl << + "Restricted mode: " << boolToEnabled(options.restricted) << std::endl << + "Save engine: " << (options.save ? options.engine : "") << std::endl << + "Load engine: " << (options.load ? options.engine : "") << std::endl << + "Profiling verbosity: " << static_cast(options.profilingVerbosity) << std::endl << + "Tactic sources: "; printTacticSources(os, options.enabledTactics, options.disabledTactics) << std::endl << + "timingCacheMode: "; printTimingCache(os, options) << std::endl << + "timingCacheFile: " << options.timingCacheFile << std::endl; + // clang-format on + + auto printIOFormats = [](std::ostream& os, const char* direction, + const std::vector formats) { + if (formats.empty()) { + os << direction << "s format: fp32:CHW" << std::endl; + } else { + for (const auto& f : formats) { + os << direction << ": " << f << std::endl; + } + } + }; + + printIOFormats(os, "Input(s)", options.inputFormats); + printIOFormats(os, "Output(s)", options.outputFormats); + printShapes(os, "build", options.shapes); + printShapes(os, "calibration", options.shapesCalib); + + return os; +} + +std::ostream& operator<<(std::ostream& os, const SystemOptions& options) { + // clang-format off + os << "=== System Options ===" << std::endl << + + "Device: " << options.device << std::endl << + "DLACore: " << (options.DLACore != -1 ? std::to_string(options.DLACore) : "") << + (options.DLACore != -1 && options.fallback ? "(With GPU fallback)" : "") << std::endl; + os << "Plugins:"; + + for (const auto& p : options.plugins) + { + os << " " << p; + } + os << std::endl; + + return os; + // clang-format on +} + +std::ostream& operator<<(std::ostream& os, const InferenceOptions& options) { + // clang-format off + os << "=== Inference Options ===" << std::endl << + + "Batch: "; + if (options.batch && options.shapes.empty()) + { + os << options.batch << std::endl; + } + else + { + os << "Explicit" << std::endl; + } + printShapes(os, "inference", options.shapes); + os << "Iterations: " << options.iterations << std::endl << + "Duration: " << options.duration << "s (+ " + << options.warmup << "ms warm up)" << std::endl << + "Sleep time: " << options.sleep << "ms" << std::endl << + "Idle time: " << options.idle << "ms" << std::endl << + "Streams: " << options.streams << std::endl << + "ExposeDMA: " << boolToEnabled(!options.overlap) << std::endl << + "Data transfers: " << boolToEnabled(!options.skipTransfers) << std::endl << + "Spin-wait: " << boolToEnabled(options.spin) << std::endl << + "Multithreading: " << boolToEnabled(options.threads) << std::endl << + "CUDA Graph: " << boolToEnabled(options.graph) << std::endl << + "Separate profiling: " << boolToEnabled(options.rerun) << std::endl << + "Time Deserialize: " << boolToEnabled(options.timeDeserialize) << std::endl << + "Time Refit: " << boolToEnabled(options.timeRefit) << std::endl << + "Skip inference: " << boolToEnabled(options.skip) << std::endl; + + // clang-format on + os << "Inputs:" << std::endl; + for (const auto& input : options.inputs) { + os << input.first << "<-" << input.second << std::endl; + } + + return os; +} + +std::ostream& operator<<(std::ostream& os, const ReportingOptions& options) { + // clang-format off + os << "=== Reporting Options ===" << std::endl << + + "Verbose: " << boolToEnabled(options.verbose) << std::endl << + "Averages: " << options.avgs << " inferences" << std::endl << + "Percentile: " << options.percentile << std::endl << + "Dump refittable layers:" << boolToEnabled(options.refit) << std::endl << + "Dump output: " << boolToEnabled(options.output) << std::endl << + "Profile: " << boolToEnabled(options.profile) << std::endl << + "Export timing to JSON file: " << options.exportTimes << std::endl << + "Export output to JSON file: " << options.exportOutput << std::endl << + "Export profile to JSON file: " << options.exportProfile << std::endl; + // clang-format on + + return os; +} + +std::ostream& operator<<(std::ostream& os, const AllOptions& options) { + os << options.model << options.build << options.system << options.inference + << options.reporting << std::endl; + return os; +} + +std::ostream& operator<<(std::ostream& os, const SafeBuilderOptions& options) { + auto printIOFormats = [](std::ostream& os, const char* direction, + const std::vector formats) { + if (formats.empty()) { + os << direction << "s format: fp32:CHW" << std::endl; + } else { + for (const auto& f : formats) { + os << direction << ": " << f << std::endl; + } + } + }; + + os << "=== Build Options ===" << std::endl; + os << "Model ONNX: " << options.onnxModelFile << std::endl; + + os << "Precision: FP16"; + if (options.int8) { + os << " + INT8"; + } + os << std::endl; + os << "Calibration file: " << options.calibFile << std::endl; + os << "Serialized Network: " << options.serialized << std::endl; + + printIOFormats(os, "Input(s)", options.inputFormats); + printIOFormats(os, "Output(s)", options.outputFormats); + + os << "Plugins:"; + for (const auto& p : options.plugins) { + os << " " << p; + } + os << std::endl; + return os; +} + +void BaseModelOptions::help(std::ostream& os) { + // clang-format off + os << " --uff= UFF model" << std::endl << + " --onnx= ONNX model" << std::endl << + " --model= Caffe model (default = no model, random weights used)" << std::endl; + // clang-format on +} + +void UffInput::help(std::ostream& os) { + // clang-format off + os << " --uffInput=,X,Y,Z Input blob name and its dimensions (X,Y,Z=C,H,W), it can be specified " + "multiple times; at least one is required for UFF models" << std::endl << + " --uffNHWC Set if inputs are in the NHWC layout instead of NCHW (use " << + "X,Y,Z=H,W,C order in --uffInput)" << std::endl; + // clang-format on +} + +void ModelOptions::help(std::ostream& os) { + // clang-format off + os << "=== Model Options ===" << std::endl; + BaseModelOptions::help(os); + os << " --deploy= Caffe prototxt file" << std::endl << + " --output=[,]* Output names (it can be specified multiple times); at least one output " + "is required for UFF and Caffe" << std::endl; + UffInput::help(os); + // clang-format on +} + +void BuildOptions::help(std::ostream& os) { + // clang-format off + os << "=== Build Options ===" "\n" + " --maxBatch Set max batch size and build an implicit batch engine (default = same size as --batch)" "\n" + " This option should not be used when the input model is ONNX or when dynamic shapes are provided." "\n" + " --minShapes=spec Build with dynamic shapes using a profile with the min shapes provided" "\n" + " --optShapes=spec Build with dynamic shapes using a profile with the opt shapes provided" "\n" + " --maxShapes=spec Build with dynamic shapes using a profile with the max shapes provided" "\n" + " --minShapesCalib=spec Calibrate with dynamic shapes using a profile with the min shapes provided" "\n" + " --optShapesCalib=spec Calibrate with dynamic shapes using a profile with the opt shapes provided" "\n" + " --maxShapesCalib=spec Calibrate with dynamic shapes using a profile with the max shapes provided" "\n" + " Note: All three of min, opt and max shapes must be supplied." "\n" + " However, if only opt shapes is supplied then it will be expanded so" "\n" + " that min shapes and max shapes are set to the same values as opt shapes." "\n" + " Input names can be wrapped with escaped single quotes (ex: \\\'Input:0\\\')." "\n" + " Example input shapes spec: input0:1x3x256x256,input1:1x3x128x128" "\n" + " Each input shape is supplied as a key-value pair where key is the input name and" "\n" + " value is the dimensions (including the batch dimension) to be used for that input." "\n" + " Each key-value pair has the key and value separated using a colon (:)." "\n" + " Multiple input shapes can be provided via comma-separated key-value pairs." "\n" + " --inputIOFormats=spec Type and format of each of the input tensors (default = all inputs in fp32:chw)" "\n" + " See --outputIOFormats help for the grammar of type and format list." "\n" + " Note: If this option is specified, please set comma-separated types and formats for all" "\n" + " inputs following the same order as network inputs ID (even if only one input" "\n" + " needs specifying IO format) or set the type and format once for broadcasting." "\n" + " --outputIOFormats=spec Type and format of each of the output tensors (default = all outputs in fp32:chw)" "\n" + " Note: If this option is specified, please set comma-separated types and formats for all" "\n" + " outputs following the same order as network outputs ID (even if only one output" "\n" + " needs specifying IO format) or set the type and format once for broadcasting." "\n" + " IO Formats: spec ::= IOfmt[\",\"spec]" "\n" + " IOfmt ::= type:fmt" "\n" + " type ::= \"fp32\"|\"fp16\"|\"int32\"|\"int8\"" "\n" + " fmt ::= (\"chw\"|\"chw2\"|\"chw4\"|\"hwc8\"|\"chw16\"|\"chw32\"|\"dhwc8\")[\"+\"fmt]" "\n" + " --workspace=N Set workspace size in MiB." "\n" + " --memPoolSize=poolspec Specify the size constraints of the designated memory pool(s) in MiB." "\n" + " Note: Also accepts decimal sizes, e.g. 0.25MiB. Will be rounded down to the nearest integer bytes." "\n" + " Pool constraint: poolspec ::= poolfmt[\",\"poolspec]" "\n" + " poolfmt ::= pool:sizeInMiB" "\n" + " pool ::= \"workspace\"|\"dlaSRAM\"|\"dlaLocalDRAM\"|\"dlaGlobalDRAM\"" "\n" + " --profilingVerbosity=mode Specify profiling verbosity. mode ::= layer_names_only|detailed|none (default = layer_names_only)" "\n" + " --minTiming=M Set the minimum number of iterations used in kernel selection (default = " + << defaultMinTiming << ")" "\n" + " --avgTiming=M Set the number of times averaged in each iteration for kernel selection (default = " + << defaultAvgTiming << ")" "\n" + " --refit Mark the engine as refittable. This will allow the inspection of refittable layers " "\n" + " and weights within the engine." "\n" + " --sparsity=spec Control sparsity (default = disabled). " "\n" + " Sparsity: spec ::= \"disable\", \"enable\", \"force\"" "\n" + " Note: Description about each of these options is as below" "\n" + " disable = do not enable sparse tactics in the builder (this is the default)" "\n" + " enable = enable sparse tactics in the builder (but these tactics will only be" "\n" + " considered if the weights have the right sparsity pattern)" "\n" + " force = enable sparse tactics in the builder and force-overwrite the weights to have" "\n" + " a sparsity pattern (even if you loaded a model yourself)" "\n" + " --noTF32 Disable tf32 precision (default is to enable tf32, in addition to fp32)" "\n" + " --fp16 Enable fp16 precision, in addition to fp32 (default = disabled)" "\n" + " --int8 Enable int8 precision, in addition to fp32 (default = disabled)" "\n" + " --best Enable all precisions to achieve the best performance (default = disabled)" "\n" + " --directIO Avoid reformatting at network boundaries. (default = disabled)" "\n" + " --precisionConstraints=spec Control precision constraint setting. (default = none)" "\n" + " Precision Constaints: spec ::= \"none\" | \"obey\" | \"prefer\"" "\n" + " none = no constraints" "\n" + " prefer = meet precision constraints set by --layerPrecisions/--layerOutputTypes if possible" "\n" + " obey = meet precision constraints set by --layerPrecisions/--layerOutputTypes or fail" "\n" + " otherwise" "\n" + " --layerPrecisions=spec Control per-layer precision constraints. Effective only when precisionConstraints is set to" "\n" + " \"obey\" or \"prefer\". (default = none)" "\n" + " The specs are read left-to-right, and later ones override earlier ones. \"*\" can be used as a" "\n" + " layerName to specify the default precision for all the unspecified layers." "\n" + " Per-layer precision spec ::= layerPrecision[\",\"spec]" "\n" + " layerPrecision ::= layerName\":\"precision" "\n" + " precision ::= \"fp32\"|\"fp16\"|\"int32\"|\"int8\"" "\n" + " --layerOutputTypes=spec Control per-layer output type constraints. Effective only when precisionConstraints is set to" "\n" + " \"obey\" or \"prefer\". (default = none)" "\n" + " The specs are read left-to-right, and later ones override earlier ones. \"*\" can be used as a" "\n" + " layerName to specify the default precision for all the unspecified layers. If a layer has more than""\n" + " one output, then multiple types separated by \"+\" can be provided for this layer." "\n" + " Per-layer output type spec ::= layerOutputTypes[\",\"spec]" "\n" + " layerOutputTypes ::= layerName\":\"type" "\n" + " type ::= \"fp32\"|\"fp16\"|\"int32\"|\"int8\"[\"+\"type]" "\n" + " --calib= Read INT8 calibration cache file" "\n" + " --safe Enable build safety certified engine" "\n" + " --consistency Perform consistency checking on safety certified engine" "\n" + " --restricted Enable safety scope checking with kSAFETY_SCOPE build flag" "\n" + " --saveEngine= Save the serialized engine" "\n" + " --loadEngine= Load a serialized engine" "\n" + " --tacticSources=tactics Specify the tactics to be used by adding (+) or removing (-) tactics from the default " "\n" + " tactic sources (default = all available tactics)." "\n" + " Note: Currently only cuDNN, cuBLAS and cuBLAS-LT are listed as optional tactics." "\n" + " Tactic Sources: tactics ::= [\",\"tactic]" "\n" + " tactic ::= (+|-)lib" "\n" + " lib ::= \"CUBLAS\"|\"CUBLAS_LT\"|\"CUDNN\"" "\n" + " For example, to disable cudnn and enable cublas: --tacticSources=-CUDNN,+CUBLAS" "\n" + " --noBuilderCache Disable timing cache in builder (default is to enable timing cache)" "\n" + " --timingCacheFile= Save/load the serialized global timing cache" "\n" + ; + // clang-format on + os << std::flush; +} + +void SystemOptions::help(std::ostream& os) { + // clang-format off + os << "=== System Options ===" << std::endl << + " --device=N Select cuda device N (default = " << defaultDevice << ")" << std::endl << + " --useDLACore=N Select DLA core N for layers that support DLA (default = none)" << std::endl << + " --allowGPUFallback When DLA is enabled, allow GPU fallback for unsupported layers " + "(default = disabled)" << std::endl; + os << " --plugins Plugin library (.so) to load (can be specified multiple times)" << std::endl; + // clang-format on +} + +void InferenceOptions::help(std::ostream& os) { + // clang-format off + os << "=== Inference Options ===" << std::endl << + " --batch=N Set batch size for implicit batch engines (default = " << defaultBatch << ")" << std::endl << + " This option should not be used when the engine is built from an ONNX model or when dynamic" << std::endl << + " shapes are provided when the engine is built." << std::endl << + " --shapes=spec Set input shapes for dynamic shapes inference inputs." << std::endl << + " Note: Input names can be wrapped with escaped single quotes (ex: \\\'Input:0\\\')." << std::endl << + " Example input shapes spec: input0:1x3x256x256, input1:1x3x128x128" << std::endl << + " Each input shape is supplied as a key-value pair where key is the input name and" << std::endl << + " value is the dimensions (including the batch dimension) to be used for that input." << std::endl << + " Each key-value pair has the key and value separated using a colon (:)." << std::endl << + " Multiple input shapes can be provided via comma-separated key-value pairs." << std::endl << + " --loadInputs=spec Load input values from files (default = generate random inputs). Input names can be " + "wrapped with single quotes (ex: 'Input:0')" << std::endl << + " Input values spec ::= Ival[\",\"spec]" << std::endl << + " Ival ::= name\":\"file" << std::endl << + " --iterations=N Run at least N inference iterations (default = " << defaultIterations << ")" << std::endl << + " --warmUp=N Run for N milliseconds to warmup before measuring performance (default = " + << defaultWarmUp << ")" << std::endl << + " --duration=N Run performance measurements for at least N seconds wallclock time (default = " + << defaultDuration << ")" << std::endl << + " --sleepTime=N Delay inference start with a gap of N milliseconds between launch and compute " + "(default = " << defaultSleep << ")" << std::endl << + " --idleTime=N Sleep N milliseconds between two continuous iterations" + "(default = " << defaultIdle << ")" << std::endl << + " --streams=N Instantiate N engines to use concurrently (default = " << defaultStreams << ")" << std::endl << + " --exposeDMA Serialize DMA transfers to and from device (default = disabled)." << std::endl << + " --noDataTransfers Disable DMA transfers to and from device (default = enabled)." << std::endl << + " --useManagedMemory Use managed memory instead of seperate host and device allocations (default = disabled)." << std::endl << + " --useSpinWait Actively synchronize on GPU events. This option may decrease synchronization time but " + "increase CPU usage and power (default = disabled)" << std::endl << + " --threads Enable multithreading to drive engines with independent threads" + " or speed up refitting (default = disabled) " << std::endl << + " --useCudaGraph Use CUDA graph to capture engine execution and then launch inference (default = disabled)." << std::endl << + " This flag may be ignored if the graph capture fails." << std::endl << + " --timeDeserialize Time the amount of time it takes to deserialize the network and exit." << std::endl << + " --timeRefit Time the amount of time it takes to refit the engine before inference." << std::endl << + " --separateProfileRun Do not attach the profiler in the benchmark run; if profiling is enabled, a second " + "profile run will be executed (default = disabled)" << std::endl << + " --buildOnly Skip inference perf measurement (default = disabled)" << std::endl; + // clang-format on +} + +void ReportingOptions::help(std::ostream& os) { + // clang-format off + os << "=== Reporting Options ===" << std::endl << + " --verbose Use verbose logging (default = false)" << std::endl << + " --avgRuns=N Report performance measurements averaged over N consecutive " + "iterations (default = " << defaultAvgRuns << ")" << std::endl << + " --percentile=P Report performance for the P percentage (0<=P<=100, 0 " + "representing max perf, and 100 representing min perf; (default" + " = " << defaultPercentile << "%)" << std::endl << + " --dumpRefit Print the refittable layers and weights from a refittable " + "engine" << std::endl << + " --dumpOutput Print the output tensor(s) of the last inference iteration " + "(default = disabled)" << std::endl << + " --dumpProfile Print profile information per layer (default = disabled)" << std::endl << + " --dumpLayerInfo Print layer information of the engine to console " + "(default = disabled)" << std::endl << + " --exportTimes= Write the timing results in a json file (default = disabled)" << std::endl << + " --exportOutput= Write the output tensors to a json file (default = disabled)" << std::endl << + " --exportProfile= Write the profile information per layer in a json file " + "(default = disabled)" << std::endl << + " --exportLayerInfo= Write the layer information of the engine in a json file " + "(default = disabled)" << std::endl; + // clang-format on +} + +void helpHelp(std::ostream& os) { + // clang-format off + os << "=== Help ===" << std::endl << + " --help, -h Print this message" << std::endl; + // clang-format on +} + +void AllOptions::help(std::ostream& os) { + ModelOptions::help(os); + os << std::endl; + BuildOptions::help(os); + os << std::endl; + InferenceOptions::help(os); + os << std::endl; + // clang-format off + os << "=== Build and Inference Batch Options ===" << std::endl << + " When using implicit batch, the max batch size of the engine, if not given, " << std::endl << + " is set to the inference batch size;" << std::endl << + " when using explicit batch, if shapes are specified only for inference, they " << std::endl << + " will be used also as min/opt/max in the build profile; if shapes are " << std::endl << + " specified only for the build, the opt shapes will be used also for inference;" << std::endl << + " if both are specified, they must be compatible; and if explicit batch is " << std::endl << + " enabled but neither is specified, the model must provide complete static" << std::endl << + " dimensions, including batch size, for all inputs" << std::endl << + " Using ONNX models automatically forces explicit batch." << std::endl << + std::endl; + // clang-format on + ReportingOptions::help(os); + os << std::endl; + SystemOptions::help(os); + os << std::endl; + helpHelp(os); +} + +void SafeBuilderOptions::printHelp(std::ostream& os) { + // clang-format off + os << "=== Mandatory ===" << std::endl << + " --onnx= ONNX model" << std::endl << + " " << std::endl << + "=== Optional ===" << std::endl << + " --inputIOFormats=spec Type and format of each of the input tensors (default = all inputs in fp32:chw)" << std::endl << + " See --outputIOFormats help for the grammar of type and format list." << std::endl << + " Note: If this option is specified, please set comma-separated types and formats for all" << std::endl << + " inputs following the same order as network inputs ID (even if only one input" << std::endl << + " needs specifying IO format) or set the type and format once for broadcasting." << std::endl << + " --outputIOFormats=spec Type and format of each of the output tensors (default = all outputs in fp32:chw)" << std::endl << + " Note: If this option is specified, please set comma-separated types and formats for all" << std::endl << + " outputs following the same order as network outputs ID (even if only one output" << std::endl << + " needs specifying IO format) or set the type and format once for broadcasting." << std::endl << + " IO Formats: spec ::= IOfmt[\",\"spec]" << std::endl << + " IOfmt ::= type:fmt" << std::endl << + " type ::= \"fp32\"|\"fp16\"|\"int32\"|\"int8\"" << std::endl << + " fmt ::= (\"chw\"|\"chw2\"|\"chw4\"|\"hwc8\"|\"chw16\"|\"chw32\"|\"dhwc8\")[\"+\"fmt]" << std::endl << + " --int8 Enable int8 precision, in addition to fp16 (default = disabled)" << std::endl << + " --consistency Enable consistency check for serialized engine, (default = disabled)" << std::endl << + " --std Build standard serialized engine, (default = disabled)" << std::endl << + " --calib= Read INT8 calibration cache file" << std::endl << + " --serialized= Save the serialized network" << std::endl << + " --plugins Plugin library (.so) to load (can be specified multiple times)" << std::endl << + " --verbose or -v Use verbose logging (default = false)" << std::endl << + " --help or -h Print this message" << std::endl << + " " << std::endl; + // clang-format on +} + +} // namespace sample diff --git a/fastdeploy/backends/tensorrt/common/sampleOptions.h b/fastdeploy/backends/tensorrt/common/sampleOptions.h new file mode 100644 index 0000000000..99293da10a --- /dev/null +++ b/fastdeploy/backends/tensorrt/common/sampleOptions.h @@ -0,0 +1,311 @@ +/* + * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef TRT_SAMPLE_OPTIONS_H +#define TRT_SAMPLE_OPTIONS_H + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "NvInfer.h" + +namespace sample { + +// Build default params +constexpr int32_t maxBatchNotProvided{0}; +constexpr int32_t defaultMinTiming{1}; +constexpr int32_t defaultAvgTiming{8}; + +// System default params +constexpr int32_t defaultDevice{0}; + +// Inference default params +constexpr int32_t defaultBatch{1}; +constexpr int32_t batchNotProvided{0}; +constexpr int32_t defaultStreams{1}; +constexpr int32_t defaultIterations{10}; +constexpr float defaultWarmUp{200.F}; +constexpr float defaultDuration{3.F}; +constexpr float defaultSleep{}; +constexpr float defaultIdle{}; + +// Reporting default params +constexpr int32_t defaultAvgRuns{10}; +constexpr float defaultPercentile{99}; + +enum class PrecisionConstraints { kNONE, kOBEY, kPREFER }; + +enum class ModelFormat { kANY, kCAFFE, kONNX, kUFF }; + +enum class SparsityFlag { kDISABLE, kENABLE, kFORCE }; + +enum class TimingCacheMode { kDISABLE, kLOCAL, kGLOBAL }; + +using Arguments = std::unordered_multimap; + +using IOFormat = std::pair; + +using ShapeRange = + std::array, + nvinfer1::EnumMax()>; + +using LayerPrecisions = std::unordered_map; +using LayerOutputTypes = + std::unordered_map>; + +struct Options { + virtual void parse(Arguments& arguments) = 0; +}; + +struct BaseModelOptions : public Options { + ModelFormat format{ModelFormat::kANY}; + std::string model; + + void parse(Arguments& arguments) override; + + static void help(std::ostream& out); +}; + +struct UffInput : public Options { + std::vector> inputs; + bool NHWC{false}; + + void parse(Arguments& arguments) override; + + static void help(std::ostream& out); +}; + +struct ModelOptions : public Options { + BaseModelOptions baseModel; + std::string prototxt; + std::vector outputs; + UffInput uffInputs; + + void parse(Arguments& arguments) override; + + static void help(std::ostream& out); +}; + +struct BuildOptions : public Options { + int32_t maxBatch{maxBatchNotProvided}; + double workspace{-1.0}; + double dlaSRAM{-1.0}; + double dlaLocalDRAM{-1.0}; + double dlaGlobalDRAM{-1.0}; + int32_t minTiming{defaultMinTiming}; + int32_t avgTiming{defaultAvgTiming}; + bool tf32{true}; + bool fp16{false}; + bool int8{false}; + bool directIO{false}; + PrecisionConstraints precisionConstraints{PrecisionConstraints::kNONE}; + LayerPrecisions layerPrecisions; + LayerOutputTypes layerOutputTypes; + bool safe{false}; + bool consistency{false}; + bool restricted{false}; + bool save{false}; + bool load{false}; + bool refittable{false}; + SparsityFlag sparsity{SparsityFlag::kDISABLE}; + nvinfer1::ProfilingVerbosity profilingVerbosity{ + nvinfer1::ProfilingVerbosity::kLAYER_NAMES_ONLY}; + std::string engine; + std::string calibration; + std::unordered_map shapes; + std::unordered_map shapesCalib; + std::vector inputFormats; + std::vector outputFormats; + nvinfer1::TacticSources enabledTactics{0}; + nvinfer1::TacticSources disabledTactics{0}; + TimingCacheMode timingCacheMode{TimingCacheMode::kLOCAL}; + std::string timingCacheFile{}; + void parse(Arguments& arguments) override; + + static void help(std::ostream& out); +}; + +struct SystemOptions : public Options { + int32_t device{defaultDevice}; + int32_t DLACore{-1}; + bool fallback{false}; + std::vector plugins; + + void parse(Arguments& arguments) override; + + static void help(std::ostream& out); +}; + +struct InferenceOptions : public Options { + int32_t batch{batchNotProvided}; + int32_t iterations{defaultIterations}; + int32_t streams{defaultStreams}; + float warmup{defaultWarmUp}; + float duration{defaultDuration}; + float sleep{defaultSleep}; + float idle{defaultIdle}; + bool overlap{true}; + bool skipTransfers{false}; + bool useManaged{false}; + bool spin{false}; + bool threads{false}; + bool graph{false}; + bool skip{false}; + bool rerun{false}; + bool timeDeserialize{false}; + bool timeRefit{false}; + std::unordered_map inputs; + std::unordered_map> shapes; + + void parse(Arguments& arguments) override; + + static void help(std::ostream& out); +}; + +struct ReportingOptions : public Options { + bool verbose{false}; + int32_t avgs{defaultAvgRuns}; + float percentile{defaultPercentile}; + bool refit{false}; + bool output{false}; + bool profile{false}; + bool layerInfo{false}; + std::string exportTimes; + std::string exportOutput; + std::string exportProfile; + std::string exportLayerInfo; + + void parse(Arguments& arguments) override; + + static void help(std::ostream& out); +}; + +struct SafeBuilderOptions : public Options { + std::string serialized{}; + std::string onnxModelFile{}; + bool help{false}; + bool verbose{false}; + std::vector inputFormats; + std::vector outputFormats; + bool int8{false}; + std::string calibFile{}; + std::vector plugins; + bool consistency{false}; + bool standard{false}; + + void parse(Arguments& arguments) override; + + static void printHelp(std::ostream& out); +}; + +struct AllOptions : public Options { + ModelOptions model; + BuildOptions build; + SystemOptions system; + InferenceOptions inference; + ReportingOptions reporting; + bool helps{false}; + + void parse(Arguments& arguments) override; + + static void help(std::ostream& out); +}; + +Arguments argsToArgumentsMap(int32_t argc, char* argv[]); + +bool parseHelp(Arguments& arguments); + +void helpHelp(std::ostream& out); + +// Functions to print options + +std::ostream& operator<<(std::ostream& os, const BaseModelOptions& options); + +std::ostream& operator<<(std::ostream& os, const UffInput& input); + +std::ostream& operator<<(std::ostream& os, const IOFormat& format); + +std::ostream& operator<<(std::ostream& os, const ShapeRange& dims); + +std::ostream& operator<<(std::ostream& os, const ModelOptions& options); + +std::ostream& operator<<(std::ostream& os, const BuildOptions& options); + +std::ostream& operator<<(std::ostream& os, const SystemOptions& options); + +std::ostream& operator<<(std::ostream& os, const InferenceOptions& options); + +std::ostream& operator<<(std::ostream& os, const ReportingOptions& options); + +std::ostream& operator<<(std::ostream& os, const AllOptions& options); + +std::ostream& operator<<(std::ostream& os, const SafeBuilderOptions& options); + +inline std::ostream& operator<<(std::ostream& os, const nvinfer1::Dims& dims) { + for (int32_t i = 0; i < dims.nbDims; ++i) { + os << (i ? "x" : "") << dims.d[i]; + } + return os; +} +inline std::ostream& operator<<(std::ostream& os, + const nvinfer1::WeightsRole role) { + switch (role) { + case nvinfer1::WeightsRole::kKERNEL: { + os << "Kernel"; + break; + } + case nvinfer1::WeightsRole::kBIAS: { + os << "Bias"; + break; + } + case nvinfer1::WeightsRole::kSHIFT: { + os << "Shift"; + break; + } + case nvinfer1::WeightsRole::kSCALE: { + os << "Scale"; + break; + } + case nvinfer1::WeightsRole::kCONSTANT: { + os << "Constant"; + break; + } + case nvinfer1::WeightsRole::kANY: { + os << "Any"; + break; + } + } + + return os; +} + +inline std::ostream& operator<<(std::ostream& os, + const std::vector& vec) { + for (int32_t i = 0, e = static_cast(vec.size()); i < e; ++i) { + os << (i ? "x" : "") << vec[i]; + } + return os; +} + +} // namespace sample + +#endif // TRT_SAMPLES_OPTIONS_H diff --git a/fastdeploy/backends/tensorrt/common/sampleReporting.cpp b/fastdeploy/backends/tensorrt/common/sampleReporting.cpp new file mode 100644 index 0000000000..5e8e8619be --- /dev/null +++ b/fastdeploy/backends/tensorrt/common/sampleReporting.cpp @@ -0,0 +1,480 @@ +/* + * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "sampleInference.h" +#include "sampleOptions.h" +#include "sampleReporting.h" + +using namespace nvinfer1; + +namespace sample { + +namespace { + +//! +//! \brief Find percentile in an ascending sequence of timings +//! \note percentile must be in [0, 100]. Otherwise, an exception is thrown. +//! +template +float findPercentile(float percentile, + std::vector const& timings, + T const& toFloat) { + int32_t const all = static_cast(timings.size()); + int32_t const exclude = static_cast((1 - percentile / 100) * all); + if (timings.empty()) { + return std::numeric_limits::infinity(); + } + if (percentile < 0.0f || percentile > 100.0f) { + throw std::runtime_error("percentile is not in [0, 100]!"); + } + return toFloat(timings[std::max(all - 1 - exclude, 0)]); +} + +//! +//! \brief Find median in a sorted sequence of timings +//! +template +float findMedian(std::vector const& timings, T const& toFloat) { + if (timings.empty()) { + return std::numeric_limits::infinity(); + } + + int32_t const m = timings.size() / 2; + if (timings.size() % 2) { + return toFloat(timings[m]); + } + + return (toFloat(timings[m - 1]) + toFloat(timings[m])) / 2; +} + +//! +//! \brief Find coefficient of variance (which is std / mean) in a sorted +//! sequence of timings given the mean +//! +template +float findCoeffOfVariance(std::vector const& timings, + T const& toFloat, float mean) { + if (timings.empty()) { + return 0; + } + + if (mean == 0.F) { + return std::numeric_limits::infinity(); + } + + auto const metricAccumulator = [toFloat, mean](float acc, + InferenceTime const& a) { + float const diff = toFloat(a) - mean; + return acc + diff * diff; + }; + float const variance = + std::accumulate(timings.begin(), timings.end(), 0.F, metricAccumulator) / + timings.size(); + + return std::sqrt(variance) / mean * 100.F; +} + +inline InferenceTime traceToTiming(const InferenceTrace& a) { + return InferenceTime((a.enqEnd - a.enqStart), (a.h2dEnd - a.h2dStart), + (a.computeEnd - a.computeStart), (a.d2hEnd - a.d2hStart), + (a.d2hEnd - a.h2dStart)); +} + +} // namespace + +void printProlog(int32_t warmups, int32_t timings, float warmupMs, + float benchTimeMs, std::ostream& os) { + os << "Warmup completed " << warmups << " queries over " << warmupMs << " ms" + << std::endl; + os << "Timing trace has " << timings << " queries over " << benchTimeMs / 1000 + << " s" << std::endl; +} + +void printTiming(std::vector const& timings, int32_t runsPerAvg, + std::ostream& os) { + int32_t count = 0; + InferenceTime sum; + + os << std::endl; + os << "=== Trace details ===" << std::endl; + os << "Trace averages of " << runsPerAvg << " runs:" << std::endl; + for (auto const& t : timings) { + sum += t; + + if (++count == runsPerAvg) { + // clang-format off + os << "Average on " << runsPerAvg << " runs - GPU latency: " << sum.compute / runsPerAvg + << " ms - Host latency: " << sum.latency() / runsPerAvg << " ms (end to end " << sum.e2e / runsPerAvg + << " ms, enqueue " << sum.enq / runsPerAvg << " ms)" << std::endl; + // clang-format on + count = 0; + sum.enq = 0; + sum.h2d = 0; + sum.compute = 0; + sum.d2h = 0; + sum.e2e = 0; + } + } +} + +void printMetricExplanations(std::ostream& os) { + os << std::endl; + os << "=== Explanations of the performance metrics ===" << std::endl; + os << "Total Host Walltime: the host walltime from when the first query " + "(after warmups) is enqueued to when the " + "last query is completed." + << std::endl; + os << "GPU Compute Time: the GPU latency to execute the kernels for a query." + << std::endl; + os << "Total GPU Compute Time: the summation of the GPU Compute Time of all " + "the queries. If this is significantly " + "shorter than Total Host Walltime, the GPU may be under-utilized " + "because of host-side overheads or data " + "transfers." + << std::endl; + os << "Throughput: the observed throughput computed by dividing the number " + "of queries by the Total Host Walltime. " + "If this is significantly lower than the reciprocal of GPU Compute " + "Time, the GPU may be under-utilized " + "because of host-side overheads or data transfers." + << std::endl; + os << "Enqueue Time: the host latency to enqueue a query. If this is longer " + "than GPU Compute Time, the GPU may be " + "under-utilized." + << std::endl; + os << "H2D Latency: the latency for host-to-device data transfers for input " + "tensors of a single query." + << std::endl; + os << "D2H Latency: the latency for device-to-host data transfers for output " + "tensors of a single query." + << std::endl; + os << "Latency: the summation of H2D Latency, GPU Compute Time, and D2H " + "Latency. This is the latency to infer a " + "single query." + << std::endl; + os << "End-to-End Host Latency: the duration from when the H2D of a query is " + "called to when the D2H of the same " + "query is completed, which includes the latency to wait for the " + "completion of the previous query. This is " + "the latency of a query if multiple queries are enqueued consecutively." + << std::endl; +} + +PerformanceResult +getPerformanceResult(std::vector const& timings, + std::function metricGetter, + float percentile) { + auto const metricComparator = [metricGetter](InferenceTime const& a, + InferenceTime const& b) { + return metricGetter(a) < metricGetter(b); + }; + auto const metricAccumulator = [metricGetter](float acc, + InferenceTime const& a) { + return acc + metricGetter(a); + }; + std::vector newTimings = timings; + std::sort(newTimings.begin(), newTimings.end(), metricComparator); + PerformanceResult result; + result.min = metricGetter(newTimings.front()); + result.max = metricGetter(newTimings.back()); + result.mean = std::accumulate(newTimings.begin(), newTimings.end(), 0.0f, + metricAccumulator) / + newTimings.size(); + result.median = findMedian(newTimings, metricGetter); + result.percentile = findPercentile(percentile, newTimings, metricGetter); + result.coeffVar = findCoeffOfVariance(newTimings, metricGetter, result.mean); + return result; +} + +void printEpilog(std::vector const& timings, float walltimeMs, + float percentile, int32_t batchSize, std::ostream& osInfo, + std::ostream& osWarning, std::ostream& osVerbose) { + float const throughput = batchSize * timings.size() / walltimeMs * 1000; + + auto const getLatency = [](InferenceTime const& t) { return t.latency(); }; + auto const latencyResult = + getPerformanceResult(timings, getLatency, percentile); + + auto const getEndToEnd = [](InferenceTime const& t) { return t.e2e; }; + auto const e2eLatencyResult = + getPerformanceResult(timings, getEndToEnd, percentile); + + auto const getEnqueue = [](InferenceTime const& t) { return t.enq; }; + auto const enqueueResult = + getPerformanceResult(timings, getEnqueue, percentile); + + auto const getH2d = [](InferenceTime const& t) { return t.h2d; }; + auto const h2dResult = getPerformanceResult(timings, getH2d, percentile); + + auto const getCompute = [](InferenceTime const& t) { return t.compute; }; + auto const gpuComputeResult = + getPerformanceResult(timings, getCompute, percentile); + + auto const getD2h = [](InferenceTime const& t) { return t.d2h; }; + auto const d2hResult = getPerformanceResult(timings, getD2h, percentile); + + auto const toPerfString = [percentile](const PerformanceResult& r) { + std::stringstream s; + s << "min = " << r.min << " ms, max = " << r.max << " ms, mean = " << r.mean + << " ms, " + << "median = " << r.median << " ms, percentile(" << percentile + << "%) = " << r.percentile << " ms"; + return s.str(); + }; + + osInfo << std::endl; + osInfo << "=== Performance summary ===" << std::endl; + osInfo << "Throughput: " << throughput << " qps" << std::endl; + osInfo << "Latency: " << toPerfString(latencyResult) << std::endl; + osInfo << "End-to-End Host Latency: " << toPerfString(e2eLatencyResult) + << std::endl; + osInfo << "Enqueue Time: " << toPerfString(enqueueResult) << std::endl; + osInfo << "H2D Latency: " << toPerfString(h2dResult) << std::endl; + osInfo << "GPU Compute Time: " << toPerfString(gpuComputeResult) << std::endl; + osInfo << "D2H Latency: " << toPerfString(d2hResult) << std::endl; + osInfo << "Total Host Walltime: " << walltimeMs / 1000 << " s" << std::endl; + osInfo << "Total GPU Compute Time: " + << gpuComputeResult.mean * timings.size() / 1000 << " s" << std::endl; + + // Report warnings if the throughput is bound by other factors than GPU + // Compute Time. + constexpr float kENQUEUE_BOUND_REPORTING_THRESHOLD{0.8F}; + if (enqueueResult.median > + kENQUEUE_BOUND_REPORTING_THRESHOLD * gpuComputeResult.median) { + osWarning << "* Throughput may be bound by Enqueue Time rather than GPU " + "Compute and the GPU may be under-utilized." + << std::endl; + osWarning << " If not already in use, --useCudaGraph (utilize CUDA graphs " + "where possible) may increase the " + "throughput." + << std::endl; + } + if (h2dResult.median >= gpuComputeResult.median) { + osWarning << "* Throughput may be bound by host-to-device transfers for " + "the inputs rather than GPU Compute and " + "the GPU may be under-utilized." + << std::endl; + osWarning << " Add --noDataTransfers flag to disable data transfers." + << std::endl; + } + if (d2hResult.median >= gpuComputeResult.median) { + osWarning << "* Throughput may be bound by device-to-host transfers for " + "the outputs rather than GPU Compute " + "and the GPU may be under-utilized." + << std::endl; + osWarning << " Add --noDataTransfers flag to disable data transfers." + << std::endl; + } + + // Report warnings if the GPU Compute Time is unstable. + constexpr float kUNSTABLE_PERF_REPORTING_THRESHOLD{1.0F}; + if (gpuComputeResult.coeffVar > kUNSTABLE_PERF_REPORTING_THRESHOLD) { + osWarning + << "* GPU compute time is unstable, with coefficient of variance = " + << gpuComputeResult.coeffVar << "%." << std::endl; + osWarning << " If not already in use, locking GPU clock frequency or " + "adding --useSpinWait may improve the " + << "stability." << std::endl; + } + + // Explain what the metrics mean. + osInfo << "Explanations of the performance metrics are printed in the " + "verbose logs." + << std::endl; + printMetricExplanations(osVerbose); + + osInfo << std::endl; +} + +void printPerformanceReport(std::vector const& trace, + const ReportingOptions& reporting, float warmupMs, + int32_t batchSize, std::ostream& osInfo, + std::ostream& osWarning, std::ostream& osVerbose) { + auto const isNotWarmup = [&warmupMs](const InferenceTrace& a) { + return a.computeStart >= warmupMs; + }; + auto const noWarmup = std::find_if(trace.begin(), trace.end(), isNotWarmup); + int32_t const warmups = noWarmup - trace.begin(); + float const benchTime = trace.back().d2hEnd - noWarmup->h2dStart; + // when implicit batch used, batchSize = options.inference.batch, which is + // parsed through --batch + // when explicit batch used, batchSize = options.inference.batch = 0 + // treat inference with explicit batch as a single query and report the + // throughput + batchSize = batchSize ? batchSize : 1; + printProlog(warmups * batchSize, (trace.size() - warmups) * batchSize, + warmupMs, benchTime, osInfo); + + std::vector timings(trace.size() - warmups); + std::transform(noWarmup, trace.end(), timings.begin(), traceToTiming); + printTiming(timings, reporting.avgs, osInfo); + printEpilog(timings, benchTime, reporting.percentile, batchSize, osInfo, + osWarning, osVerbose); + + if (!reporting.exportTimes.empty()) { + exportJSONTrace(trace, reporting.exportTimes); + } +} + +//! Printed format: +//! [ value, ...] +//! value ::= { "start enq : time, "end enq" : time, "start h2d" : time, "end +//! h2d" : time, "start compute" : time, +//! "end compute" : time, "start d2h" : time, "end d2h" : time, +//! "h2d" : time, "compute" : time, +//! "d2h" : time, "latency" : time, "end to end" : time } +//! +void exportJSONTrace(std::vector const& trace, + std::string const& fileName) { + std::ofstream os(fileName, std::ofstream::trunc); + os << "[" << std::endl; + char const* sep = " "; + for (auto const& t : trace) { + InferenceTime const it(traceToTiming(t)); + os << sep << "{ "; + sep = ", "; + // clang-format off + os << "\"startEnqMs\" : " << t.enqStart << sep << "\"endEnqMs\" : " << t.enqEnd << sep + << "\"startH2dMs\" : " << t.h2dStart << sep << "\"endH2dMs\" : " << t.h2dEnd << sep + << "\"startComputeMs\" : " << t.computeStart << sep << "\"endComputeMs\" : " << t.computeEnd << sep + << "\"startD2hMs\" : " << t.d2hStart << sep << "\"endD2hMs\" : " << t.d2hEnd << sep + << "\"h2dMs\" : " << it.h2d << sep << "\"computeMs\" : " << it.compute << sep + << "\"d2hMs\" : " << it.d2h << sep << "\"latencyMs\" : " << it.latency() << sep + << "\"endToEndMs\" : " << it.e2e << " }" << std::endl; + // clang-format on + } + os << "]" << std::endl; +} + +void Profiler::reportLayerTime(char const* layerName, float timeMs) noexcept { + if (mIterator == mLayers.end()) { + bool const first = !mLayers.empty() && mLayers.begin()->name == layerName; + mUpdatesCount += mLayers.empty() || first; + if (first) { + mIterator = mLayers.begin(); + } else { + mLayers.emplace_back(); + mLayers.back().name = layerName; + mIterator = mLayers.end() - 1; + } + } + + mIterator->timeMs += timeMs; + ++mIterator; +} + +void Profiler::print(std::ostream& os) const noexcept { + std::string const nameHdr("Layer"); + std::string const timeHdr(" Time (ms)"); + std::string const avgHdr(" Avg. Time (ms)"); + std::string const percentageHdr(" Time %"); + + float const totalTimeMs = getTotalTime(); + + auto const cmpLayer = [](LayerProfile const& a, LayerProfile const& b) { + return a.name.size() < b.name.size(); + }; + auto const longestName = + std::max_element(mLayers.begin(), mLayers.end(), cmpLayer); + auto const nameLength = + std::max(longestName->name.size() + 1, nameHdr.size()); + auto const timeLength = timeHdr.size(); + auto const avgLength = avgHdr.size(); + auto const percentageLength = percentageHdr.size(); + + os << std::endl + << "=== Profile (" << mUpdatesCount << " iterations ) ===" << std::endl + << std::setw(nameLength) << nameHdr << timeHdr << avgHdr << percentageHdr + << std::endl; + + for (auto const& p : mLayers) { + // clang-format off + os << std::setw(nameLength) << p.name << std::setw(timeLength) << std::fixed << std::setprecision(2) << p.timeMs + << std::setw(avgLength) << std::fixed << std::setprecision(4) << p.timeMs / mUpdatesCount + << std::setw(percentageLength) << std::fixed << std::setprecision(1) << p.timeMs / totalTimeMs * 100 + << std::endl; + } + { + os << std::setw(nameLength) << "Total" << std::setw(timeLength) << std::fixed << std::setprecision(2) + << totalTimeMs << std::setw(avgLength) << std::fixed << std::setprecision(4) << totalTimeMs / mUpdatesCount + << std::setw(percentageLength) << std::fixed << std::setprecision(1) << 100.0 << std::endl; + // clang-format on + } + os << std::endl; +} + +void Profiler::exportJSONProfile(std::string const& fileName) const noexcept { + std::ofstream os(fileName, std::ofstream::trunc); + os << "[" << std::endl + << " { \"count\" : " << mUpdatesCount << " }" << std::endl; + + auto const totalTimeMs = getTotalTime(); + + for (auto const& l : mLayers) { + // clang-format off + os << ", {" << " \"name\" : \"" << l.name << "\"" + ", \"timeMs\" : " << l.timeMs + << ", \"averageMs\" : " << l.timeMs / mUpdatesCount + << ", \"percentage\" : " << l.timeMs / totalTimeMs * 100 + << " }" << std::endl; + // clang-format on + } + os << "]" << std::endl; +} + +void dumpInputs(nvinfer1::IExecutionContext const& context, + Bindings const& bindings, std::ostream& os) { + os << "Input Tensors:" << std::endl; + bindings.dumpInputs(context, os); +} + +void dumpOutputs(nvinfer1::IExecutionContext const& context, + Bindings const& bindings, std::ostream& os) { + os << "Output Tensors:" << std::endl; + bindings.dumpOutputs(context, os); +} + +void exportJSONOutput(nvinfer1::IExecutionContext const& context, + Bindings const& bindings, std::string const& fileName, + int32_t batch) { + std::ofstream os(fileName, std::ofstream::trunc); + std::string sep = " "; + auto const output = bindings.getOutputBindings(); + os << "[" << std::endl; + for (auto const& binding : output) { + // clang-format off + os << sep << "{ \"name\" : \"" << binding.first << "\"" << std::endl; + sep = ", "; + os << " " << sep << "\"dimensions\" : \""; + bindings.dumpBindingDimensions(binding.second, context, os); + os << "\"" << std::endl; + os << " " << sep << "\"values\" : [ "; + bindings.dumpBindingValues(context, binding.second, os, sep, batch); + os << " ]" << std::endl << " }" << std::endl; + // clang-format on + } + os << "]" << std::endl; +} + +} // namespace sample diff --git a/fastdeploy/backends/tensorrt/common/sampleReporting.h b/fastdeploy/backends/tensorrt/common/sampleReporting.h new file mode 100644 index 0000000000..68b78af9c6 --- /dev/null +++ b/fastdeploy/backends/tensorrt/common/sampleReporting.h @@ -0,0 +1,211 @@ +/* + * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef TRT_SAMPLE_REPORTING_H +#define TRT_SAMPLE_REPORTING_H + +#include +#include + +#include "NvInfer.h" + +#include "sampleOptions.h" +#include "sampleUtils.h" + +namespace sample { + +//! +//! \struct InferenceTime +//! \brief Measurement times in milliseconds +//! +struct InferenceTime { + InferenceTime(float q, float i, float c, float o, float e) + : enq(q), h2d(i), compute(c), d2h(o), e2e(e) {} + + InferenceTime() = default; + InferenceTime(InferenceTime const&) = default; + InferenceTime(InferenceTime&&) = default; + InferenceTime& operator=(InferenceTime const&) = default; + InferenceTime& operator=(InferenceTime&&) = default; + ~InferenceTime() = default; + + float enq{0}; // Enqueue + float h2d{0}; // Host to Device + float compute{0}; // Compute + float d2h{0}; // Device to Host + float e2e{0}; // end to end + + // ideal latency + float latency() const { return h2d + compute + d2h; } +}; + +//! +//! \struct InferenceTrace +//! \brief Measurement points in milliseconds +//! +struct InferenceTrace { + InferenceTrace(int32_t s, float es, float ee, float is, float ie, float cs, + float ce, float os, float oe) + : stream(s), enqStart(es), enqEnd(ee), h2dStart(is), h2dEnd(ie), + computeStart(cs), computeEnd(ce), d2hStart(os), d2hEnd(oe) {} + + InferenceTrace() = default; + InferenceTrace(InferenceTrace const&) = default; + InferenceTrace(InferenceTrace&&) = default; + InferenceTrace& operator=(InferenceTrace const&) = default; + InferenceTrace& operator=(InferenceTrace&&) = default; + ~InferenceTrace() = default; + + int32_t stream{0}; + float enqStart{0}; + float enqEnd{0}; + float h2dStart{0}; + float h2dEnd{0}; + float computeStart{0}; + float computeEnd{0}; + float d2hStart{0}; + float d2hEnd{0}; +}; + +inline InferenceTime operator+(InferenceTime const& a, InferenceTime const& b) { + return InferenceTime(a.enq + b.enq, a.h2d + b.h2d, a.compute + b.compute, + a.d2h + b.d2h, a.e2e + b.e2e); +} + +inline InferenceTime operator+=(InferenceTime& a, InferenceTime const& b) { + return a = a + b; +} + +//! +//! \struct PerformanceResult +//! \brief Performance result of a performance metric +//! +struct PerformanceResult { + float min{0}; + float max{0}; + float mean{0}; + float median{0}; + float percentile{0}; + float coeffVar{0}; // coefficient of variation +}; + +//! +//! \brief Print benchmarking time and number of traces collected +//! +void printProlog(int32_t warmups, int32_t timings, float warmupMs, + float walltime, std::ostream& os); + +//! +//! \brief Print a timing trace +//! +void printTiming(std::vector const& timings, int32_t runsPerAvg, + std::ostream& os); + +//! +//! \brief Print the performance summary of a trace +//! +void printEpilog(std::vector const& timings, float percentile, + int32_t batchSize, std::ostream& osInfo, + std::ostream& osWarning, std::ostream& osVerbose); + +//! +//! \brief Get the result of a specific performance metric from a trace +//! +PerformanceResult +getPerformanceResult(std::vector const& timings, + std::function metricGetter, + float percentile); + +//! +//! \brief Print the explanations of the performance metrics printed in +//! printEpilog() function. +//! +void printMetricExplanations(std::ostream& os); + +//! +//! \brief Print and summarize a timing trace +//! +void printPerformanceReport(std::vector const& trace, + ReportingOptions const& reporting, float warmupMs, + int32_t batchSize, std::ostream& osInfo, + std::ostream& osWarning, std::ostream& osVerbose); + +//! +//! \brief Export a timing trace to JSON file +//! +void exportJSONTrace(std::vector const& trace, + std::string const& fileName); + +//! +//! \brief Print input tensors to stream +//! +void dumpInputs(nvinfer1::IExecutionContext const& context, + Bindings const& bindings, std::ostream& os); + +//! +//! \brief Print output tensors to stream +//! +void dumpOutputs(nvinfer1::IExecutionContext const& context, + Bindings const& bindings, std::ostream& os); + +//! +//! \brief Export output tensors to JSON file +//! +void exportJSONOutput(nvinfer1::IExecutionContext const& context, + Bindings const& bindings, std::string const& fileName, + int32_t batch); + +//! +//! \struct LayerProfile +//! \brief Layer profile information +//! +struct LayerProfile { + std::string name; + float timeMs{0}; +}; + +//! +//! \class Profiler +//! \brief Collect per-layer profile information, assuming times are reported in +//! the same order +//! +class Profiler : public nvinfer1::IProfiler { + public: + void reportLayerTime(char const* layerName, float timeMs) noexcept override; + + void print(std::ostream& os) const noexcept; + + //! + //! \brief Export a profile to JSON file + //! + void exportJSONProfile(std::string const& fileName) const noexcept; + + private: + float getTotalTime() const noexcept { + auto const plusLayerTime = [](float accumulator, LayerProfile const& lp) { + return accumulator + lp.timeMs; + }; + return std::accumulate(mLayers.begin(), mLayers.end(), 0.0, plusLayerTime); + } + + std::vector mLayers; + std::vector::iterator mIterator{mLayers.begin()}; + int32_t mUpdatesCount{0}; +}; + +} // namespace sample + +#endif // TRT_SAMPLE_REPORTING_H diff --git a/fastdeploy/backends/tensorrt/common/sampleUtils.h b/fastdeploy/backends/tensorrt/common/sampleUtils.h new file mode 100644 index 0000000000..2c6f415bc4 --- /dev/null +++ b/fastdeploy/backends/tensorrt/common/sampleUtils.h @@ -0,0 +1,494 @@ +/* + * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef TRT_SAMPLE_UTILS_H +#define TRT_SAMPLE_UTILS_H + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "NvInfer.h" + +#include "common.h" +#include "logger.h" +#include "sampleDevice.h" +#include "sampleOptions.h" + +namespace sample { + +inline int dataTypeSize(nvinfer1::DataType dataType) { + switch (dataType) { + case nvinfer1::DataType::kINT32: + case nvinfer1::DataType::kFLOAT: + return 4; + case nvinfer1::DataType::kHALF: + return 2; + case nvinfer1::DataType::kBOOL: + case nvinfer1::DataType::kINT8: + return 1; + } + return 0; +} + +template inline T roundUp(T m, T n) { + return ((m + n - 1) / n) * n; +} + +inline int volume(const nvinfer1::Dims& d) { + return std::accumulate(d.d, d.d + d.nbDims, 1, std::multiplies()); +} + +//! comps is the number of components in a vector. Ignored if vecDim < 0. +inline int64_t volume(const nvinfer1::Dims& dims, const nvinfer1::Dims& strides, + int vecDim, int comps, int batch) { + int maxNbElems = 1; + for (int i = 0; i < dims.nbDims; ++i) { + // Get effective length of axis. + int d = dims.d[i]; + // Any dimension is 0, it is an empty tensor. + if (d == 0) { + return 0; + } + if (i == vecDim) { + d = samplesCommon::divUp(d, comps); + } + maxNbElems = std::max(maxNbElems, d * strides.d[i]); + } + return static_cast(maxNbElems) * batch * (vecDim < 0 ? 1 : comps); +} + +inline int64_t volume(nvinfer1::Dims dims, int vecDim, int comps, int batch) { + if (vecDim != -1) { + dims.d[vecDim] = roundUp(dims.d[vecDim], comps); + } + return volume(dims) * std::max(batch, 1); +} + +inline nvinfer1::Dims toDims(const std::vector& vec) { + int limit = static_cast(nvinfer1::Dims::MAX_DIMS); + if (static_cast(vec.size()) > limit) { + sample::gLogWarning + << "Vector too long, only first 8 elements are used in dimension." + << std::endl; + } + // Pick first nvinfer1::Dims::MAX_DIMS elements + nvinfer1::Dims dims{std::min(static_cast(vec.size()), limit), {}}; + std::copy_n(vec.begin(), dims.nbDims, std::begin(dims.d)); + return dims; +} + +template +inline void fillBuffer(void* buffer, int64_t volume, T min, T max) { + T* typedBuffer = static_cast(buffer); + std::default_random_engine engine; + if (std::is_integral::value) { + std::uniform_int_distribution distribution(min, max); + auto generator = [&engine, &distribution]() { + return static_cast(distribution(engine)); + }; + std::generate(typedBuffer, typedBuffer + volume, generator); + } else { + std::uniform_real_distribution distribution(min, max); + auto generator = [&engine, &distribution]() { + return static_cast(distribution(engine)); + }; + std::generate(typedBuffer, typedBuffer + volume, generator); + } +} + +// Specialization needed for custom type __half +template +inline void fillBufferHalf(void* buffer, int64_t volume, H min, H max) { + H* typedBuffer = static_cast(buffer); + std::default_random_engine engine; + std::uniform_real_distribution distribution(min, max); + auto generator = [&engine, &distribution]() { + return static_cast(distribution(engine)); + }; + std::generate(typedBuffer, typedBuffer + volume, generator); +} +template <> +inline void fillBuffer<__half>(void* buffer, int64_t volume, __half min, + __half max) { + fillBufferHalf(buffer, volume, min, max); +} + +template +inline void dumpBuffer(const void* buffer, const std::string& separator, + std::ostream& os, const Dims& dims, const Dims& strides, + int32_t vectorDim, int32_t spv) { + const int64_t volume = std::accumulate(dims.d, dims.d + dims.nbDims, 1, + std::multiplies()); + const T* typedBuffer = static_cast(buffer); + std::string sep; + for (int64_t v = 0; v < volume; ++v) { + int64_t curV = v; + int32_t dataOffset = 0; + for (int32_t dimIndex = dims.nbDims - 1; dimIndex >= 0; --dimIndex) { + int32_t dimVal = curV % dims.d[dimIndex]; + if (dimIndex == vectorDim) { + dataOffset += (dimVal / spv) * strides.d[dimIndex] * spv + dimVal % spv; + } else { + dataOffset += + dimVal * strides.d[dimIndex] * (vectorDim == -1 ? 1 : spv); + } + curV /= dims.d[dimIndex]; + ASSERT(curV >= 0); + } + + os << sep << typedBuffer[dataOffset]; + sep = separator; + } +} + +inline void loadFromFile(std::string const& fileName, char* dst, size_t size) { + ASSERT(dst); + + std::ifstream file(fileName, std::ios::in | std::ios::binary); + if (file.is_open()) { + file.read(dst, size); + file.close(); + } else { + std::stringstream msg; + msg << "Cannot open file " << fileName << "!"; + throw std::invalid_argument(msg.str()); + } +} + +struct Binding { + bool isInput{false}; + std::unique_ptr buffer; + int64_t volume{0}; + nvinfer1::DataType dataType{nvinfer1::DataType::kFLOAT}; + + void fill(const std::string& fileName) { + loadFromFile(fileName, static_cast(buffer->getHostBuffer()), + buffer->getSize()); + } + + void fill() { + switch (dataType) { + case nvinfer1::DataType::kBOOL: { + fillBuffer(buffer->getHostBuffer(), volume, 0, 1); + break; + } + case nvinfer1::DataType::kINT32: { + fillBuffer(buffer->getHostBuffer(), volume, -128, 127); + break; + } + case nvinfer1::DataType::kINT8: { + fillBuffer(buffer->getHostBuffer(), volume, -128, 127); + break; + } + case nvinfer1::DataType::kFLOAT: { + fillBuffer(buffer->getHostBuffer(), volume, -1.0F, 1.0F); + break; + } + case nvinfer1::DataType::kHALF: { + fillBuffer<__half>(buffer->getHostBuffer(), volume, -1.0F, 1.0F); + break; + } + } + } + + void dump(std::ostream& os, Dims dims, Dims strides, int32_t vectorDim, + int32_t spv, const std::string separator = " ") const { + switch (dataType) { + case nvinfer1::DataType::kBOOL: { + dumpBuffer(buffer->getHostBuffer(), separator, os, dims, strides, + vectorDim, spv); + break; + } + case nvinfer1::DataType::kINT32: { + dumpBuffer(buffer->getHostBuffer(), separator, os, dims, strides, + vectorDim, spv); + break; + } + case nvinfer1::DataType::kINT8: { + dumpBuffer(buffer->getHostBuffer(), separator, os, dims, strides, + vectorDim, spv); + break; + } + case nvinfer1::DataType::kFLOAT: { + dumpBuffer(buffer->getHostBuffer(), separator, os, dims, strides, + vectorDim, spv); + break; + } + case nvinfer1::DataType::kHALF: { + dumpBuffer<__half>(buffer->getHostBuffer(), separator, os, dims, strides, + vectorDim, spv); + break; + } + } + } +}; + +class Bindings { + public: + Bindings() = delete; + explicit Bindings(bool useManaged) : mUseManaged(useManaged) {} + + void addBinding(int b, const std::string& name, bool isInput, int64_t volume, + nvinfer1::DataType dataType, + const std::string& fileName = "") { + while (mBindings.size() <= static_cast(b)) { + mBindings.emplace_back(); + mDevicePointers.emplace_back(); + } + mNames[name] = b; + if (mBindings[b].buffer == nullptr) { + if (mUseManaged) { + mBindings[b].buffer.reset(new UnifiedMirroredBuffer); + } else { + mBindings[b].buffer.reset(new DiscreteMirroredBuffer); + } + } + mBindings[b].isInput = isInput; + // Some memory allocators return nullptr when allocating zero bytes, but + // TensorRT requires a non-null ptr + // even for empty tensors, so allocate a dummy byte. + if (volume == 0) { + mBindings[b].buffer->allocate(1); + } else { + mBindings[b].buffer->allocate( + static_cast(volume) * + static_cast(dataTypeSize(dataType))); + } + mBindings[b].volume = volume; + mBindings[b].dataType = dataType; + mDevicePointers[b] = mBindings[b].buffer->getDeviceBuffer(); + if (isInput) { + if (fileName.empty()) { + fill(b); + } else { + fill(b, fileName); + } + } + } + + void** getDeviceBuffers() { return mDevicePointers.data(); } + + void transferInputToDevice(TrtCudaStream& stream) { + for (auto& b : mNames) { + if (mBindings[b.second].isInput) { + mBindings[b.second].buffer->hostToDevice(stream); + } + } + } + + void transferOutputToHost(TrtCudaStream& stream) { + for (auto& b : mNames) { + if (!mBindings[b.second].isInput) { + mBindings[b.second].buffer->deviceToHost(stream); + } + } + } + + void fill(int binding, const std::string& fileName) { + mBindings[binding].fill(fileName); + } + + void fill(int binding) { mBindings[binding].fill(); } + + void dumpBindingDimensions(int binding, + const nvinfer1::IExecutionContext& context, + std::ostream& os) const { + const auto dims = context.getBindingDimensions(binding); + // Do not add a newline terminator, because the caller may be outputting a + // JSON string. + os << dims; + } + + void dumpBindingValues(const nvinfer1::IExecutionContext& context, + int binding, std::ostream& os, + const std::string& separator = " ", + int32_t batch = 1) const { + Dims dims = context.getBindingDimensions(binding); + Dims strides = context.getStrides(binding); + int32_t vectorDim = context.getEngine().getBindingVectorizedDim(binding); + const int32_t spv = + context.getEngine().getBindingComponentsPerElement(binding); + + if (context.getEngine().hasImplicitBatchDimension()) { + auto insertN = [](Dims& d, int32_t bs) { + const int32_t nbDims = d.nbDims; + ASSERT(nbDims < Dims::MAX_DIMS); + std::copy_backward(&d.d[0], &d.d[nbDims], &d.d[nbDims + 1]); + d.d[0] = bs; + d.nbDims = nbDims + 1; + }; + int32_t batchStride = 0; + for (int32_t i = 0; i < strides.nbDims; ++i) { + if (strides.d[i] * dims.d[i] > batchStride) { + batchStride = strides.d[i] * dims.d[i]; + } + } + insertN(dims, batch); + insertN(strides, batchStride); + vectorDim = (vectorDim == -1) ? -1 : vectorDim + 1; + } + + mBindings[binding].dump(os, dims, strides, vectorDim, spv, separator); + } + + void dumpInputs(const nvinfer1::IExecutionContext& context, + std::ostream& os) const { + auto isInput = [](const Binding& b) { return b.isInput; }; + dumpBindings(context, isInput, os); + } + + void dumpOutputs(const nvinfer1::IExecutionContext& context, + std::ostream& os) const { + auto isOutput = [](const Binding& b) { return !b.isInput; }; + dumpBindings(context, isOutput, os); + } + + void dumpBindings(const nvinfer1::IExecutionContext& context, + std::ostream& os) const { + auto all = [](const Binding& b) { return true; }; + dumpBindings(context, all, os); + } + + void dumpBindings(const nvinfer1::IExecutionContext& context, + bool (*predicate)(const Binding& b), + std::ostream& os) const { + for (const auto& n : mNames) { + const auto binding = n.second; + if (predicate(mBindings[binding])) { + os << n.first << ": ("; + dumpBindingDimensions(binding, context, os); + os << ")" << std::endl; + + dumpBindingValues(context, binding, os); + os << std::endl; + } + } + } + + std::unordered_map getInputBindings() const { + auto isInput = [](const Binding& b) { return b.isInput; }; + return getBindings(isInput); + } + + std::unordered_map getOutputBindings() const { + auto isOutput = [](const Binding& b) { return !b.isInput; }; + return getBindings(isOutput); + } + + std::unordered_map getBindings() const { + auto all = [](const Binding& b) { return true; }; + return getBindings(all); + } + + std::unordered_map + getBindings(bool (*predicate)(const Binding& b)) const { + std::unordered_map bindings; + for (const auto& n : mNames) { + const auto binding = n.second; + if (predicate(mBindings[binding])) { + bindings.insert(n); + } + } + return bindings; + } + + private: + std::unordered_map mNames; + std::vector mBindings; + std::vector mDevicePointers; + bool mUseManaged{false}; +}; + +template struct TrtDestroyer { + void operator()(T* t) { t->destroy(); } +}; + +template using TrtUniquePtr = std::unique_ptr>; + +inline bool broadcastIOFormats(const std::vector& formats, + size_t nbBindings, bool isInput = true) { + bool broadcast = formats.size() == 1; + bool validFormatsCount = broadcast || (formats.size() == nbBindings); + if (!formats.empty() && !validFormatsCount) { + if (isInput) { + throw std::invalid_argument( + "The number of inputIOFormats must match network's inputs or be one " + "for broadcasting."); + } else { + throw std::invalid_argument( + "The number of outputIOFormats must match network's outputs or be " + "one for broadcasting."); + } + } + return broadcast; +} + +inline std::vector loadTimingCacheFile(const std::string inFileName) { + std::ifstream iFile(inFileName, std::ios::in | std::ios::binary); + if (!iFile) { + sample::gLogWarning << "Could not read timing cache from: " << inFileName + << ". A new timing cache will be generated and written." + << std::endl; + return std::vector(); + } + iFile.seekg(0, std::ifstream::end); + size_t fsize = iFile.tellg(); + iFile.seekg(0, std::ifstream::beg); + std::vector content(fsize); + iFile.read(content.data(), fsize); + iFile.close(); + sample::gLogInfo << "Loaded " << fsize << " bytes of timing cache from " + << inFileName << std::endl; + return content; +} + +inline void saveTimingCacheFile(const std::string outFileName, + const IHostMemory* blob) { + std::ofstream oFile(outFileName, std::ios::out | std::ios::binary); + if (!oFile) { + sample::gLogWarning << "Could not write timing cache to: " << outFileName + << std::endl; + return; + } + oFile.write((char*)blob->data(), blob->size()); + oFile.close(); + sample::gLogInfo << "Saved " << blob->size() << " bytes of timing cache to " + << outFileName << std::endl; +} + +inline int32_t getCudaDriverVersion() { + int32_t version{-1}; + cudaCheck(cudaDriverGetVersion(&version)); + return version; +} + +inline int32_t getCudaRuntimeVersion() { + int32_t version{-1}; + cudaCheck(cudaRuntimeGetVersion(&version)); + return version; +} + +} // namespace sample + +#endif // TRT_SAMPLE_UTILS_H diff --git a/fastdeploy/backends/tensorrt/trt_backend.cc b/fastdeploy/backends/tensorrt/trt_backend.cc new file mode 100644 index 0000000000..36682badb5 --- /dev/null +++ b/fastdeploy/backends/tensorrt/trt_backend.cc @@ -0,0 +1,453 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/backends/tensorrt/trt_backend.h" +#include "fastdeploy/utils/utils.h" +#ifdef ENABLE_PADDLE_FRONTEND +#include "paddle2onnx/converter.h" +#endif + +namespace fastdeploy { +size_t TrtDataTypeSize(const nvinfer1::DataType& dtype) { + if (dtype == nvinfer1::DataType::kFLOAT) { + return sizeof(float); + } else if (dtype == nvinfer1::DataType::kHALF) { + return sizeof(float) / 2; + } else if (dtype == nvinfer1::DataType::kINT8) { + return sizeof(int8_t); + } else if (dtype == nvinfer1::DataType::kINT32) { + return sizeof(int32_t); + } + // kBOOL + return sizeof(bool); +} + +FDDataType GetFDDataType(const nvinfer1::DataType& dtype) { + if (dtype == nvinfer1::DataType::kFLOAT) { + return FDDataType::FP32; + } else if (dtype == nvinfer1::DataType::kHALF) { + return FDDataType::FP16; + } else if (dtype == nvinfer1::DataType::kINT8) { + return FDDataType::INT8; + } else if (dtype == nvinfer1::DataType::kINT32) { + return FDDataType::INT32; + } + // kBOOL + return FDDataType::BOOL; +} + +std::vector toVec(const nvinfer1::Dims& dim) { + std::vector out(dim.d, dim.d + dim.nbDims); + return out; +} + +bool TrtBackend::InitFromTrt(const std::string& trt_engine_file) { + if (initialized_) { + FDERROR << "TrtBackend is already initlized, cannot initialize again." + << std::endl; + return false; + } + std::ifstream fin(trt_engine_file, std::ios::binary | std::ios::in); + if (!fin) { + FDERROR << "Failed to open TensorRT Engine file " << trt_engine_file + << std::endl; + return false; + } + fin.seekg(0, std::ios::end); + std::string engine_buffer; + engine_buffer.resize(fin.tellg()); + fin.seekg(0, std::ios::beg); + fin.read(&(engine_buffer.at(0)), engine_buffer.size()); + fin.close(); + SampleUniquePtr runtime{ + createInferRuntime(sample::gLogger.getTRTLogger())}; + if (!runtime) { + FDERROR << "Failed to call createInferRuntime()." << std::endl; + return false; + } + engine_ = std::shared_ptr( + runtime->deserializeCudaEngine(engine_buffer.data(), + engine_buffer.size()), + samplesCommon::InferDeleter()); + if (!engine_) { + FDERROR << "Failed to call deserializeCudaEngine()." << std::endl; + return false; + } + + context_ = std::shared_ptr( + engine_->createExecutionContext()); + FDASSERT(cudaStreamCreate(&stream_) == 0, + "[ERROR] Error occurs while calling cudaStreamCreate()."); + GetInputOutputInfo(); + initialized_ = true; + return true; +} + +bool TrtBackend::InitFromPaddle(const std::string& model_file, + const std::string& params_file, + const TrtBackendOption& option, bool verbose) { + if (initialized_) { + FDERROR << "TrtBackend is already initlized, cannot initialize again." + << std::endl; + return false; + } + +#ifdef ENABLE_PADDLE_FRONTEND + char* model_content_ptr; + int model_content_size = 0; + if (!paddle2onnx::Export(model_file.c_str(), params_file.c_str(), + &model_content_ptr, &model_content_size, 11, true, + verbose, true, true, true)) { + FDERROR << "Error occured while export PaddlePaddle to ONNX format." + << std::endl; + return false; + } + std::string onnx_model_proto(model_content_ptr, + model_content_ptr + model_content_size); + delete model_content_ptr; + model_content_ptr = nullptr; + return InitFromOnnx(onnx_model_proto, option, true); +#else + FDERROR << "Didn't compile with PaddlePaddle frontend, you can try to " + "call `InitFromOnnx` instead." + << std::endl; + return false; +#endif +} + +bool TrtBackend::InitFromOnnx(const std::string& model_file, + const TrtBackendOption& option, + bool from_memory_buffer) { + if (initialized_) { + FDERROR << "TrtBackend is already initlized, cannot initialize again." + << std::endl; + return false; + } + cudaSetDevice(option.gpu_id); + + if (option.serialize_file != "") { + std::ifstream fin(option.serialize_file, std::ios::binary | std::ios::in); + if (fin) { + FDLogger() << "Detect serialized TensorRT Engine file in " + << option.serialize_file << ", will load it directly." + << std::endl; + fin.close(); + return InitFromTrt(option.serialize_file); + } + } + + std::string onnx_content = ""; + if (!from_memory_buffer) { + std::ifstream fin(model_file.c_str(), std::ios::binary | std::ios::in); + if (!fin) { + FDERROR << "[ERROR] Failed to open ONNX model file: " << model_file + << std::endl; + return false; + } + fin.seekg(0, std::ios::end); + onnx_content.resize(fin.tellg()); + fin.seekg(0, std::ios::beg); + fin.read(&(onnx_content.at(0)), onnx_content.size()); + fin.close(); + } else { + onnx_content = model_file; + } + + if (!CreateTrtEngine(onnx_content, option)) { + return false; + } + + context_ = std::shared_ptr( + engine_->createExecutionContext()); + FDASSERT(cudaStreamCreate(&stream_) == 0, + "[ERROR] Error occurs while calling cudaStreamCreate()."); + GetInputOutputInfo(); + initialized_ = true; + return true; +} + +bool TrtBackend::Infer(std::vector& inputs, + std::vector* outputs) { + AllocateBufferInDynamicShape(inputs, outputs); + std::vector input_binds(inputs.size()); + for (size_t i = 0; i < inputs.size(); ++i) { + if (inputs[0].dtype == FDDataType::INT64) { + int64_t* data = static_cast(inputs[i].Data()); + std::vector casted_data(data, data + inputs[i].Numel()); + FDASSERT(cudaMemcpyAsync(inputs_buffer_[inputs[i].name].data(), + static_cast(casted_data.data()), + inputs[i].Nbytes() / 2, cudaMemcpyHostToDevice, + stream_) == 0, + "[ERROR] Error occurs while copy memory from CPU to GPU."); + } else { + FDASSERT(cudaMemcpyAsync(inputs_buffer_[inputs[i].name].data(), + inputs[i].Data(), inputs[i].Nbytes(), + cudaMemcpyHostToDevice, stream_) == 0, + "[ERROR] Error occurs while copy memory from CPU to GPU."); + } + // FDASSERT(cudaMemcpy(inputs_buffer_[inputs[i].name].data(), + // inputs[i].GetData(), inputs[i].Nbytes(), + // cudaMemcpyHostToDevice) == 0, + // "[ERROR] Error occurs while copy memory from CPU to GPU."); + } + if (!context_->enqueueV2(bindings_.data(), stream_, nullptr)) { + FDERROR << "Failed to Infer with TensorRT." << std::endl; + return false; + } + for (size_t i = 0; i < outputs->size(); ++i) { + FDASSERT(cudaMemcpyAsync((*outputs)[i].Data(), + outputs_buffer_[(*outputs)[i].name].data(), + (*outputs)[i].Nbytes(), cudaMemcpyDeviceToHost, + stream_) == 0, + "[ERROR] Error occurs while copy memory from GPU to CPU."); + // FDASSERT(cudaMemcpy((*outputs)[i].data.data(), + // outputs_buffer_[(*outputs)[i].name].data(), + // (*outputs)[i].Nbytes(), + // cudaMemcpyDeviceToHost) == 0, + // "[ERROR] Error occurs while copy memory from GPU to CPU."); + } + // FDASSERT(cudaStreamSynchronize(stream_) == 0, + // "[ERROR] Error occurs while calling cudaStreamSynchronize()."); + return true; +} + +void TrtBackend::GetInputOutputInfo() { + inputs_desc_.clear(); + outputs_desc_.clear(); + auto num_binds = engine_->getNbBindings(); + for (auto i = 0; i < num_binds; ++i) { + std::string name = std::string(engine_->getBindingName(i)); + auto shape = toVec(engine_->getBindingDimensions(i)); + auto dtype = engine_->getBindingDataType(i); + if (engine_->bindingIsInput(i)) { + inputs_desc_.emplace_back(TrtValueInfo{name, shape, dtype}); + inputs_buffer_[name] = DeviceBuffer(dtype); + } else { + outputs_desc_.emplace_back(TrtValueInfo{name, shape, dtype}); + outputs_buffer_[name] = DeviceBuffer(dtype); + } + } + bindings_.resize(num_binds); +} + +void TrtBackend::AllocateBufferInDynamicShape( + const std::vector& inputs, std::vector* outputs) { + for (const auto& item : inputs) { + auto idx = engine_->getBindingIndex(item.name.c_str()); + std::vector shape(item.shape.begin(), item.shape.end()); + auto dims = sample::toDims(shape); + context_->setBindingDimensions(idx, dims); + if (item.Nbytes() > inputs_buffer_[item.name].nbBytes()) { + inputs_buffer_[item.name].resize(dims); + bindings_[idx] = inputs_buffer_[item.name].data(); + } + } + if (outputs->size() != outputs_desc_.size()) { + outputs->resize(outputs_desc_.size()); + } + for (size_t i = 0; i < outputs_desc_.size(); ++i) { + auto idx = engine_->getBindingIndex(outputs_desc_[i].name.c_str()); + auto output_dims = context_->getBindingDimensions(idx); + (*outputs)[i].dtype = GetFDDataType(outputs_desc_[i].dtype); + (*outputs)[i].shape.assign(output_dims.d, + output_dims.d + output_dims.nbDims); + (*outputs)[i].name = outputs_desc_[i].name; + (*outputs)[i].data.resize(volume(output_dims) * + TrtDataTypeSize(outputs_desc_[i].dtype)); + if ((*outputs)[i].Nbytes() > + outputs_buffer_[outputs_desc_[i].name].nbBytes()) { + outputs_buffer_[outputs_desc_[i].name].resize(output_dims); + bindings_[idx] = outputs_buffer_[outputs_desc_[i].name].data(); + } + } +} + +bool TrtBackend::CreateTrtEngine(const std::string& onnx_model, + const TrtBackendOption& option) { + const auto explicitBatch = + 1U << static_cast( + nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH); + + auto builder = SampleUniquePtr( + nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger())); + if (!builder) { + FDERROR << "Failed to call createInferBuilder()." << std::endl; + return false; + } + auto network = SampleUniquePtr( + builder->createNetworkV2(explicitBatch)); + if (!network) { + FDERROR << "Failed to call createNetworkV2()." << std::endl; + return false; + } + auto config = + SampleUniquePtr(builder->createBuilderConfig()); + if (!config) { + FDERROR << "Failed to call createBuilderConfig()." << std::endl; + return false; + } + + if (option.enable_fp16) { + if (!builder->platformHasFastFp16()) { + FDLogger() << "[WARN] Detected FP16 is not supported in the current GPU, " + "will use FP32 instead." + << std::endl; + } else { + config->setFlag(nvinfer1::BuilderFlag::kFP16); + } + } + + auto parser = SampleUniquePtr( + nvonnxparser::createParser(*network, sample::gLogger.getTRTLogger())); + if (!parser) { + FDERROR << "Failed to call createParser()." << std::endl; + return false; + } + if (!parser->parse(onnx_model.data(), onnx_model.size())) { + FDERROR << "Failed to parse ONNX model by TensorRT." << std::endl; + return false; + } + + FDLogger() << "Start to building TensorRT Engine..." << std::endl; + bool fp16 = builder->platformHasFastFp16(); + builder->setMaxBatchSize(option.max_batch_size); + + config->setMaxWorkspaceSize(option.max_workspace_size); + + if (option.fixed_shape.size() > 0) { + auto profile = builder->createOptimizationProfile(); + for (auto& item : option.fixed_shape) { + FDASSERT(profile->setDimensions(item.first.c_str(), + nvinfer1::OptProfileSelector::kMIN, + sample::toDims(item.second)), + "[TrtBackend] Failed to set min_shape for input: " + item.first + + " in TrtBackend."); + FDASSERT(profile->setDimensions(item.first.c_str(), + nvinfer1::OptProfileSelector::kOPT, + sample::toDims(item.second)), + "[TrtBackend] Failed to set min_shape for input: " + item.first + + " in TrtBackend."); + FDASSERT(profile->setDimensions(item.first.c_str(), + nvinfer1::OptProfileSelector::kMAX, + sample::toDims(item.second)), + "[TrtBackend] Failed to set min_shape for input: " + item.first + + " in TrtBackend."); + } + config->addOptimizationProfile(profile); + } else if (option.max_shape.size() > 0) { + auto profile = builder->createOptimizationProfile(); + FDASSERT(option.max_shape.size() == option.min_shape.size() && + option.min_shape.size() == option.opt_shape.size(), + "[TrtBackend] Size of max_shape/opt_shape/min_shape in " + "TrtBackendOption should keep same."); + for (const auto& item : option.min_shape) { + // set min shape + FDASSERT(profile->setDimensions(item.first.c_str(), + nvinfer1::OptProfileSelector::kMIN, + sample::toDims(item.second)), + "[TrtBackend] Failed to set min_shape for input: " + item.first + + " in TrtBackend."); + + // set optimization shape + auto iter = option.opt_shape.find(item.first); + FDASSERT(iter != option.opt_shape.end(), + "[TrtBackend] Cannot find input name: " + item.first + + " in TrtBackendOption::opt_shape."); + FDASSERT(profile->setDimensions(item.first.c_str(), + nvinfer1::OptProfileSelector::kOPT, + sample::toDims(iter->second)), + "[TrtBackend] Failed to set opt_shape for input: " + item.first + + " in TrtBackend."); + // set max shape + iter = option.max_shape.find(item.first); + FDASSERT(iter != option.max_shape.end(), + "[TrtBackend] Cannot find input name: " + item.first + + " in TrtBackendOption::max_shape."); + FDASSERT(profile->setDimensions(item.first.c_str(), + nvinfer1::OptProfileSelector::kMAX, + sample::toDims(iter->second)), + "[TrtBackend] Failed to set max_shape for input: " + item.first + + " in TrtBackend."); + } + config->addOptimizationProfile(profile); + } + + SampleUniquePtr plan{ + builder->buildSerializedNetwork(*network, *config)}; + if (!plan) { + FDERROR << "Failed to call buildSerializedNetwork()." << std::endl; + return false; + } + + SampleUniquePtr runtime{ + createInferRuntime(sample::gLogger.getTRTLogger())}; + if (!runtime) { + FDERROR << "Failed to call createInferRuntime()." << std::endl; + return false; + } + + engine_ = std::shared_ptr( + runtime->deserializeCudaEngine(plan->data(), plan->size()), + samplesCommon::InferDeleter()); + if (!engine_) { + FDERROR << "Failed to call deserializeCudaEngine()." << std::endl; + return false; + } + + FDLogger() << "TensorRT Engine is built succussfully." << std::endl; + if (option.serialize_file != "") { + FDLogger() << "Serialize TensorRTEngine to local file " + << option.serialize_file << "." << std::endl; + std::ofstream engine_file(option.serialize_file.c_str()); + if (!engine_file) { + FDERROR << "Failed to open " << option.serialize_file << " to write." + << std::endl; + return false; + } + engine_file.write(static_cast(plan->data()), plan->size()); + engine_file.close(); + FDLogger() << "TensorRTEngine is serialized to local file " + << option.serialize_file + << ", we can load this model from the seralized engine " + "directly next time." + << std::endl; + } + return true; +} + +TensorInfo TrtBackend::GetInputInfo(int index) { + FDASSERT(index < NumInputs(), "The index:" + std::to_string(index) + + " should less than the number of inputs:" + + std::to_string(NumInputs()) + "."); + TensorInfo info; + info.name = inputs_desc_[index].name; + info.shape.assign(inputs_desc_[index].shape.begin(), + inputs_desc_[index].shape.end()); + info.dtype = GetFDDataType(inputs_desc_[index].dtype); + return info; +} + +TensorInfo TrtBackend::GetOutputInfo(int index) { + FDASSERT(index < NumOutputs(), + "The index:" + std::to_string(index) + + " should less than the number of outputs:" + + std::to_string(NumOutputs()) + "."); + TensorInfo info; + info.name = outputs_desc_[index].name; + info.shape.assign(outputs_desc_[index].shape.begin(), + outputs_desc_[index].shape.end()); + info.dtype = GetFDDataType(outputs_desc_[index].dtype); + return info; +} +} // namespace fastdeploy diff --git a/fastdeploy/backends/tensorrt/trt_backend.h b/fastdeploy/backends/tensorrt/trt_backend.h new file mode 100644 index 0000000000..3b77c8bc24 --- /dev/null +++ b/fastdeploy/backends/tensorrt/trt_backend.h @@ -0,0 +1,98 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include + +#include "fastdeploy/backends/backend.h" + +#include "fastdeploy/backends/tensorrt/common/argsParser.h" +#include "fastdeploy/backends/tensorrt/common/buffers.h" +#include "fastdeploy/backends/tensorrt/common/common.h" +#include "fastdeploy/backends/tensorrt/common/logger.h" +#include "fastdeploy/backends/tensorrt/common/parserOnnxConfig.h" +#include "fastdeploy/backends/tensorrt/common/sampleUtils.h" + +#include "NvInfer.h" +#include + +namespace fastdeploy { +using namespace samplesCommon; + +struct TrtValueInfo { + std::string name; + std::vector shape; + nvinfer1::DataType dtype; +}; + +struct TrtBackendOption { + int gpu_id = 0; + bool enable_fp16 = false; + bool enable_int8 = false; + size_t max_batch_size = 32; + size_t max_workspace_size = 1 << 30; + std::map> fixed_shape; + std::map> max_shape; + std::map> min_shape; + std::map> opt_shape; + std::string serialize_file = ""; +}; + +std::vector toVec(const nvinfer1::Dims& dim); +size_t TrtDataTypeSize(const nvinfer1::DataType& dtype); +FDDataType GetFDDataType(const nvinfer1::DataType& dtype); + +class TrtBackend : public BaseBackend { + public: + TrtBackend() : engine_(nullptr), context_(nullptr) {} + void BuildOption(const TrtBackendOption& option); + + bool InitFromPaddle(const std::string& model_file, + const std::string& params_file, + const TrtBackendOption& option = TrtBackendOption(), + bool verbose = false); + bool InitFromOnnx(const std::string& model_file, + const TrtBackendOption& option = TrtBackendOption(), + bool from_memory_buffer = false); + bool InitFromTrt(const std::string& trt_engine_file); + + bool Infer(std::vector& inputs, std::vector* outputs); + + int NumInputs() const { return inputs_desc_.size(); } + int NumOutputs() const { return outputs_desc_.size(); } + TensorInfo GetInputInfo(int index); + TensorInfo GetOutputInfo(int index); + + private: + std::shared_ptr engine_; + std::shared_ptr context_; + cudaStream_t stream_{}; + std::vector bindings_; + std::vector inputs_desc_; + std::vector outputs_desc_; + std::map inputs_buffer_; + std::map outputs_buffer_; + + void GetInputOutputInfo(); + void AllocateBufferInDynamicShape(const std::vector& inputs, + std::vector* outputs); + bool CreateTrtEngine(const std::string& onnx_model, + const TrtBackendOption& option); +}; + +} // namespace fastdeploy diff --git a/fastdeploy/core/config.h.in b/fastdeploy/core/config.h.in new file mode 100644 index 0000000000..48854e1ffe --- /dev/null +++ b/fastdeploy/core/config.h.in @@ -0,0 +1,50 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#ifndef FASTDEPLOY_DEBUG +#cmakedefine FASTDEPLOY_DEBUG +#endif + +#ifndef FASTDEPLOY_LIB +#cmakedefine FASTDEPLOY_LIB +#endif + +#ifndef ENABLE_PADDLE_FRONTEND +#cmakedefine ENABLE_PADDLE_FRONTEND +#endif + +#ifndef ENABLE_ORT_BACKEND +#cmakedefine ENABLE_ORT_BACKEND +#endif + +#ifndef WITH_GPU +#cmakedefine WITH_GPU +#endif + +#ifndef ENABLE_TRT_BACKEND +#cmakedefine ENABLE_TRT_BACKEND +#endif + +#ifndef ENABLE_VISION +#cmakedefine ENABLE_VISION +#endif + +#ifndef ENABLE_OPENCV_CUDA +#cmakedefine ENABLE_OPENCV_CUDA +#endif + +#ifndef ENABLE_VISION_VISUALIZE +#cmakedefine ENABLE_VISION_VISUALIZE +#endif diff --git a/fastdeploy/core/fd_tensor.cc b/fastdeploy/core/fd_tensor.cc new file mode 100644 index 0000000000..97b33dad58 --- /dev/null +++ b/fastdeploy/core/fd_tensor.cc @@ -0,0 +1,127 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/core/fd_tensor.h" +#include "fastdeploy/utils/utils.h" + +#ifdef WITH_GPU +#include +#endif + +namespace fastdeploy { + +void* FDTensor::MutableData() { + if (external_data_ptr != nullptr) { + return external_data_ptr; + } + return data.data(); +} + +void* FDTensor::Data() { + if (external_data_ptr != nullptr) { + if (device == Device::GPU) { +#ifdef WITH_GPU + // need to copy cuda mem to cpu first + temporary_cpu_buffer.resize(Nbytes()); + FDASSERT(cudaMemcpy(temporary_cpu_buffer.data(), external_data_ptr, + Nbytes(), cudaMemcpyDeviceToHost) == 0, + "[ERROR] Error occurs while copy memory from GPU to CPU"); + return temporary_cpu_buffer.data(); +#else + FDASSERT(false, + "The FastDeploy didn't compile under -DWITH_GPU=ON, so this is " + "an unexpected problem happend."); +#endif + } else { + return external_data_ptr; + } + } + return data.data(); +} + +void FDTensor::SetExternalData(const std::vector& new_shape, + const FDDataType& data_type, void* data_buffer) { + dtype = data_type; + shape.assign(new_shape.begin(), new_shape.end()); + external_data_ptr = data_buffer; +} + +void FDTensor::Allocate(const std::vector& new_shape, + const FDDataType& data_type, + const std::string& tensor_name) { + dtype = data_type; + name = tensor_name; + shape.assign(new_shape.begin(), new_shape.end()); + int unit = FDDataTypeSize(data_type); + int total_size = + std::accumulate(shape.begin(), shape.end(), 1, std::multiplies()); + data.resize(total_size * unit); +} + +int FDTensor::Nbytes() const { return Numel() * FDDataTypeSize(dtype); } + +int FDTensor::Numel() const { + return std::accumulate(shape.begin(), shape.end(), 1, std::multiplies()); +} + +template +void CalculateStatisInfo(void* src_ptr, int size, double* mean, double* max, + double* min) { + T* ptr = static_cast(src_ptr); + *mean = 0; + *max = -99999999; + *min = 99999999; + for (int i = 0; i < size; ++i) { + if (*(ptr + i) > *max) { + *max = *(ptr + i); + } + if (*(ptr + i) < *min) { + *min = *(ptr + i); + } + *mean += *(ptr + i); + } + *mean = *mean / size; +} + +void FDTensor::PrintInfo(const std::string& prefix) { + double mean = 0; + double max = -99999999; + double min = 99999999; + if (dtype == FDDataType::FP32) { + CalculateStatisInfo(Data(), Numel(), &mean, &max, &min); + } else if (dtype == FDDataType::FP64) { + CalculateStatisInfo(Data(), Numel(), &mean, &max, &min); + } else if (dtype == FDDataType::INT8) { + CalculateStatisInfo(Data(), Numel(), &mean, &max, &min); + } else if (dtype == FDDataType::UINT8) { + CalculateStatisInfo(Data(), Numel(), &mean, &max, &min); + } else if (dtype == FDDataType::INT32) { + CalculateStatisInfo(Data(), Numel(), &mean, &max, &min); + } else if (dtype == FDDataType::INT64) { + CalculateStatisInfo(Data(), Numel(), &mean, &max, &min); + } else { + FDASSERT(false, + "PrintInfo function doesn't support current situation, maybe you " + "need enhance this function now.") + } + std::cout << prefix << ": shape="; + for (int i = 0; i < shape.size(); ++i) { + std::cout << shape[i] << " "; + } + std::cout << ", dtype=" << FDDataTypeStr(dtype) << ", mean=" << mean + << ", max=" << max << ", min=" << min << std::endl; +} + +FDTensor::FDTensor(const std::string& tensor_name) { name = tensor_name; } +} // namespace fastdeploy diff --git a/fastdeploy/core/fd_tensor.h b/fastdeploy/core/fd_tensor.h new file mode 100644 index 0000000000..a00ff87fdf --- /dev/null +++ b/fastdeploy/core/fd_tensor.h @@ -0,0 +1,84 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include +#include +#include +#include + +#include "fastdeploy/core/fd_type.h" + +namespace fastdeploy { + +struct FASTDEPLOY_DECL FDTensor { + std::vector data; + std::vector shape; + std::string name = ""; + FDDataType dtype; + + // This use to skip memory copy step + // the external_data_ptr will point to the user allocated memory + // user has to maintain the memory, allocate and release + void* external_data_ptr = nullptr; + // The internal data will be on CPU + // Some times, the external data is on the GPU, and we are going to use + // GPU to inference the model + // so we can skip data transfer, which may improve the efficience + Device device = Device::CPU; + + // if the external data is not on CPU, we use this temporary buffer + // to transfer data to CPU at some cases we need to visit the + // other devices' data + std::vector temporary_cpu_buffer; + + // Get data buffer pointer + void* MutableData(); + + // Use this data to get the tensor data to process + // Since the most senario is process data in CPU + // this function weill return a pointer to cpu memory + // buffer. + // If the original data is on other device, the data + // will copy to cpu store in `temporary_cpu_buffer` + void* Data(); + + // Set user memory buffer for Tensor, the memory is managed by + // the user it self, but the Tensor will share the memory with user + // So take care with the user buffer + void SetExternalData(const std::vector& new_shape, + const FDDataType& data_type, void* data_buffer); + + // Initialize Tensor + // Include setting attribute for tensor + // and allocate cpu memory buffer + void Allocate(const std::vector& new_shape, const FDDataType& data_type, + const std::string& tensor_name = ""); + + // Total size of tensor memory buffer in bytes + int Nbytes() const; + + // Total number of elements in this tensor + int Numel() const; + + // Debug function + // Use this function to print shape, dtype, mean, max, min + // prefix will also be printed as tag + void PrintInfo(const std::string& prefix = "TensorInfo: "); + + FDTensor() {} + explicit FDTensor(const std::string& tensor_name); +}; + +} // namespace fastdeploy diff --git a/fastdeploy/core/fd_type.cc b/fastdeploy/core/fd_type.cc new file mode 100644 index 0000000000..b66cabeb8b --- /dev/null +++ b/fastdeploy/core/fd_type.cc @@ -0,0 +1,122 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/core/fd_type.h" +#include "fastdeploy/utils/utils.h" + +namespace fastdeploy { + +int FDDataTypeSize(FDDataType data_type) { + FDASSERT(data_type != FDDataType::FP16, "Float16 is not supported."); + if (data_type == FDDataType::BOOL) { + return sizeof(bool); + } else if (data_type == FDDataType::INT16) { + return sizeof(int16_t); + } else if (data_type == FDDataType::INT32) { + return sizeof(int32_t); + } else if (data_type == FDDataType::INT64) { + return sizeof(int64_t); + } else if (data_type == FDDataType::FP32) { + return sizeof(float); + } else if (data_type == FDDataType::FP64) { + return sizeof(double); + } else if (data_type == FDDataType::UINT8) { + return sizeof(uint8_t); + } else { + FDASSERT(false, "Unexpected data type: " + FDDataTypeStr(data_type)); + } + return -1; +} + +std::string FDDataTypeStr(FDDataType data_type) { + FDASSERT(data_type != FDDataType::FP16, "Float16 is not supported."); + if (data_type == FDDataType::BOOL) { + return "bool"; + } else if (data_type == FDDataType::INT16) { + return "int16"; + } else if (data_type == FDDataType::INT32) { + return "int32"; + } else if (data_type == FDDataType::INT64) { + return "int64"; + } else if (data_type == FDDataType::FP16) { + return "float16"; + } else if (data_type == FDDataType::FP32) { + return "float32"; + } else if (data_type == FDDataType::FP64) { + return "float64"; + } else if (data_type == FDDataType::UINT8) { + return "uint8"; + } else if (data_type == FDDataType::INT8) { + return "int8"; + } else { + FDASSERT(false, "Unexpected data type: " + FDDataTypeStr(data_type)); + } + return "UNKNOWN!"; +} + +std::string Str(Device& d) { + std::string out; + switch (d) { + case Device::DEFAULT: + out = "Device::DEFAULT"; + break; + case Device::CPU: + out = "Device::CPU"; + break; + case Device::GPU: + out = "Device::GPU"; + break; + default: + out = "Device::UNKOWN"; + } + return out; +} + +std::string Str(FDDataType& fdt) { + std::string out; + switch (fdt) { + case FDDataType::BOOL: + out = "FDDataType::BOOL"; + break; + case FDDataType::INT16: + out = "FDDataType::INT16"; + break; + case FDDataType::INT32: + out = "FDDataType::INT32"; + break; + case FDDataType::INT64: + out = "FDDataType::INT64"; + break; + case FDDataType::FP32: + out = "FDDataType::FP32"; + break; + case FDDataType::FP64: + out = "FDDataType::FP64"; + break; + case FDDataType::FP16: + out = "FDDataType::FP16"; + break; + case FDDataType::UINT8: + out = "FDDataType::UINT8"; + break; + case FDDataType::INT8: + out = "FDDataType::INT8"; + break; + default: + out = "FDDataType::UNKNOWN"; + } + return out; +} + +} // namespace fastdeploy diff --git a/fastdeploy/core/fd_type.h b/fastdeploy/core/fd_type.h new file mode 100644 index 0000000000..02e46551c3 --- /dev/null +++ b/fastdeploy/core/fd_type.h @@ -0,0 +1,59 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include +#include +#include + +#include "fastdeploy/core/config.h" +#include "fastdeploy/utils/utils.h" + +namespace fastdeploy { + +enum class Device { DEFAULT, CPU, GPU }; + +FASTDEPLOY_DECL std::string Str(Device& d); + +enum class FDDataType { + BOOL, + INT16, + INT32, + INT64, + FP16, + FP32, + FP64, + UNKNOWN1, + UNKNOWN2, + UNKNOWN3, + UNKNOWN4, + UNKNOWN5, + UNKNOWN6, + UNKNOWN7, + UNKNOWN8, + UNKNOWN9, + UNKNOWN10, + UNKNOWN11, + UNKNOWN12, + UNKNOWN13, + UINT8, + INT8 +}; + +FASTDEPLOY_DECL std::string Str(FDDataType& fdt); + +FASTDEPLOY_DECL int32_t FDDataTypeSize(FDDataType data_dtype); + +FASTDEPLOY_DECL std::string FDDataTypeStr(FDDataType data_dtype); +} // namespace fastdeploy diff --git a/fastdeploy/download.py b/fastdeploy/download.py deleted file mode 100644 index 3b5e92bf3c..0000000000 --- a/fastdeploy/download.py +++ /dev/null @@ -1,186 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import os.path as osp -import shutil -import requests -import time -import zipfile -import hashlib -import tqdm -import logging - -DOWNLOAD_RETRY_LIMIT = 3 - - -def md5check(fullname, md5sum=None): - if md5sum is None: - return True - - logging.info("File {} md5 checking...".format(fullname)) - md5 = hashlib.md5() - with open(fullname, 'rb') as f: - for chunk in iter(lambda: f.read(4096), b""): - md5.update(chunk) - calc_md5sum = md5.hexdigest() - - if calc_md5sum != md5sum: - logging.info("File {} md5 check failed, {}(calc) != " - "{}(base)".format(fullname, calc_md5sum, md5sum)) - return False - return True - - -def move_and_merge_tree(src, dst): - """ - Move src directory to dst, if dst is already exists, - merge src to dst - """ - if not osp.exists(dst): - shutil.move(src, dst) - else: - if not osp.isdir(src): - shutil.move(src, dst) - return - for fp in os.listdir(src): - src_fp = osp.join(src, fp) - dst_fp = osp.join(dst, fp) - if osp.isdir(src_fp): - if osp.isdir(dst_fp): - move_and_merge_tree(src_fp, dst_fp) - else: - shutil.move(src_fp, dst_fp) - elif osp.isfile(src_fp) and \ - not osp.isfile(dst_fp): - shutil.move(src_fp, dst_fp) - - -def download(url, path, rename=None, md5sum=None, show_progress=False): - """ - Download from url, save to path. - url (str): download url - path (str): download to given path - """ - if not osp.exists(path): - os.makedirs(path) - - fname = osp.split(url)[-1] - fullname = osp.join(path, fname) - if rename is not None: - fullname = osp.join(path, rename) - retry_cnt = 0 - while not (osp.exists(fullname) and md5check(fullname, md5sum)): - if retry_cnt < DOWNLOAD_RETRY_LIMIT: - retry_cnt += 1 - else: - logging.debug("{} download failed.".format(fname)) - raise RuntimeError("Download from {} failed. " - "Retry limit reached".format(url)) - - logging.info("Downloading {} from {}".format(fname, url)) - - req = requests.get(url, stream=True) - if req.status_code != 200: - raise RuntimeError("Downloading from {} failed with code " - "{}!".format(url, req.status_code)) - - # For protecting download interupted, download to - # tmp_fullname firstly, move tmp_fullname to fullname - # after download finished - tmp_fullname = fullname + "_tmp" - total_size = req.headers.get('content-length') - with open(tmp_fullname, 'wb') as f: - if total_size and show_progress: - for chunk in tqdm.tqdm( - req.iter_content(chunk_size=1024), - total=(int(total_size) + 1023) // 1024, - unit='KB'): - f.write(chunk) - else: - for chunk in req.iter_content(chunk_size=1024): - if chunk: - f.write(chunk) - shutil.move(tmp_fullname, fullname) - logging.debug("{} download completed.".format(fname)) - - return fullname - - -def decompress(fname): - """ - Decompress for zip and tar file - """ - logging.info("Decompressing {}...".format(fname)) - - # For protecting decompressing interupted, - # decompress to fpath_tmp directory firstly, if decompress - # successed, move decompress files to fpath and delete - # fpath_tmp and remove download compress file. - fpath = osp.split(fname)[0] - fpath_tmp = osp.join(fpath, 'tmp') - if osp.isdir(fpath_tmp): - shutil.rmtree(fpath_tmp) - os.makedirs(fpath_tmp) - - if fname.find('.tar') >= 0 or fname.find('.tgz') >= 0: - with tarfile.open(fname) as tf: - tf.extractall(path=fpath_tmp) - elif fname.find('.zip') >= 0: - with zipfile.ZipFile(fname) as zf: - zf.extractall(path=fpath_tmp) - else: - raise TypeError("Unsupport compress file type {}".format(fname)) - - for f in os.listdir(fpath_tmp): - src_dir = osp.join(fpath_tmp, f) - dst_dir = osp.join(fpath, f) - move_and_merge_tree(src_dir, dst_dir) - - shutil.rmtree(fpath_tmp) - logging.debug("{} decompressed.".format(fname)) - return dst_dir - - -def url2dir(url, path, rename=None): - full_name = download(url, path, rename, show_progress=True) - print("SDK is donwloaded, now extracting...") - if url.count(".tgz") > 0 or url.count(".tar") > 0 or url.count("zip") > 0: - return decompress(full_name) - - -def download_and_decompress(url, path='.', rename=None): - fname = osp.split(url)[-1] - fullname = osp.join(path, fname) - # if url.endswith(('tgz', 'tar.gz', 'tar', 'zip')): - # fullname = osp.join(path, fname.split('.')[0]) - nranks = 0 - if nranks <= 1: - dst_dir = url2dir(url, path, rename) - if dst_dir is not None: - fullname = dst_dir - else: - lock_path = fullname + '.lock' - if not os.path.exists(fullname): - with open(lock_path, 'w'): - os.utime(lock_path, None) - if local_rank == 0: - dst_dir = url2dir(url, path, rename) - if dst_dir is not None: - fullname = dst_dir - os.remove(lock_path) - else: - while os.path.exists(lock_path): - time.sleep(1) - return diff --git a/fastdeploy/fastdeploy_model.cc b/fastdeploy/fastdeploy_model.cc new file mode 100644 index 0000000000..f0a6fac711 --- /dev/null +++ b/fastdeploy/fastdeploy_model.cc @@ -0,0 +1,167 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "fastdeploy/fastdeploy_model.h" +#include "fastdeploy/utils/utils.h" + +namespace fastdeploy { + +bool FastDeployModel::InitRuntime() { + FDASSERT( + ModelFormatCheck(runtime_option.model_file, runtime_option.model_format), + "ModelFormatCheck Failed."); + if (runtime_initialized_) { + FDERROR << "The model is already initialized, cannot be initliazed again." + << std::endl; + return false; + } + if (runtime_option.backend != Backend::UNKNOWN) { + if (runtime_option.backend == Backend::ORT) { + if (!IsBackendAvailable(Backend::ORT)) { + FDERROR + << "Backend::ORT is not complied with current FastDeploy library." + << std::endl; + return false; + } + } else if (runtime_option.backend == Backend::TRT) { + if (!IsBackendAvailable(Backend::TRT)) { + FDERROR + << "Backend:TRT is not complied with current FastDeploy library." + << std::endl; + return false; + } + } else { + FDERROR << "Only support Backend::ORT / Backend::TRT now." << std::endl; + return false; + } + runtime_ = new Runtime(); + if (!runtime_->Init(runtime_option)) { + return false; + } + runtime_initialized_ = true; + return true; + } + + if (runtime_option.device == Device::CPU) { + return CreateCpuBackend(); + } else if (runtime_option.device == Device::GPU) { +#ifdef WITH_GPU + return CreateGpuBackend(); +#else + FDERROR << "The compiled FastDeploy library doesn't support GPU now." + << std::endl; + return false; +#endif + } + FDERROR << "Only support CPU/GPU now." << std::endl; + return false; +} + +bool FastDeployModel::CreateCpuBackend() { + if (valid_cpu_backends.size() == 0) { + FDERROR << "There's no valid cpu backends for model: " << ModelName() + << std::endl; + return false; + } + + for (auto& b : valid_cpu_backends) { + if (b == Backend::ORT) { + if (!IsBackendAvailable(Backend::ORT)) { + FDERROR << "OrtBackend is not complied with current FastDeploy library." + << std::endl; + continue; + } + runtime_option.backend = b; + runtime_ = new Runtime(); + if (!runtime_->Init(runtime_option)) { + return false; + } + runtime_initialized_ = true; + return true; + } else { + FDERROR << "Only Backend::ORT as cpu backend is supported now." + << std::endl; + return false; + } + } + + FDERROR << "Cannot find an available cpu backend to load this model." + << std::endl; + return false; +} + +bool FastDeployModel::CreateGpuBackend() { + if (valid_gpu_backends.size() == 0) { + FDERROR << "There's no valid gpu backends for model: " << ModelName() + << std::endl; + return false; + } + + for (auto& b : valid_gpu_backends) { + if (b == Backend::ORT) { + if (!IsBackendAvailable(Backend::ORT)) { + FDERROR << "OrtBackend is not complied with current FastDeploy library." + << std::endl; + continue; + } + runtime_option.backend = b; + runtime_ = new Runtime(); + if (!runtime_->Init(runtime_option)) { + return false; + } + runtime_initialized_ = true; + return true; + } else if (b == Backend::TRT) { + if (!IsBackendAvailable(Backend::TRT)) { + FDERROR << "TrtBackend is not complied with current FastDeploy library." + << std::endl; + continue; + } + runtime_option.backend = b; + runtime_ = new Runtime(); + if (!runtime_->Init(runtime_option)) { + return false; + } + return true; + } else { + FDERROR << "Only Backend::ORT / Backend::TRT as gpu backends are " + "supported now." + << std::endl; + return false; + } + } + + FDERROR << "Cannot find an available gpu backend to load this model." + << std::endl; + return false; +} + +bool FastDeployModel::Infer(std::vector& input_tensors, + std::vector* output_tensors) { + return runtime_->Infer(input_tensors, output_tensors); +} + +void FastDeployModel::EnableDebug() { +#ifdef FASTDEPLOY_DEBUG + debug_ = true; +#else + FDLogger() << "The compile FastDeploy is not with -DENABLE_DEBUG=ON, so " + "cannot enable debug mode." + << std::endl; + debug_ = false; +#endif +} + +bool FastDeployModel::DebugEnabled() { return debug_; } + +} // namespace fastdeploy diff --git a/fastdeploy/fastdeploy_model.h b/fastdeploy/fastdeploy_model.h new file mode 100644 index 0000000000..070a905f41 --- /dev/null +++ b/fastdeploy/fastdeploy_model.h @@ -0,0 +1,67 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include "fastdeploy/fastdeploy_runtime.h" + +namespace fastdeploy { + +class FASTDEPLOY_DECL FastDeployModel { + public: + virtual std::string ModelName() const { return "NameUndefined"; }; + + virtual bool InitRuntime(); + virtual bool CreateCpuBackend(); + virtual bool CreateGpuBackend(); + virtual bool Infer(std::vector& input_tensors, + std::vector* output_tensors); + + RuntimeOption runtime_option; + std::vector valid_cpu_backends = {Backend::ORT}; + std::vector valid_gpu_backends = {Backend::ORT}; + std::vector valid_external_backends; + bool initialized = false; + virtual int NumInputsOfRuntime() { return runtime_->NumInputs(); } + virtual int NumOutputsOfRuntime() { return runtime_->NumOutputs(); } + virtual TensorInfo InputInfoOfRuntime(int index) { + return runtime_->GetInputInfo(index); + } + virtual TensorInfo OutputInfoOfRuntime(int index) { + return runtime_->GetOutputInfo(index); + } + virtual bool Initialized() const { + return runtime_initialized_ && initialized; + } + + virtual void EnableDebug(); + virtual bool DebugEnabled(); + + private: + Runtime* runtime_ = nullptr; + bool runtime_initialized_ = false; + bool debug_ = false; +}; + +#define TIMERECORD_START(id) \ + TimeCounter tc_##id; \ + tc_##id.Start(); + +#define TIMERECORD_END(id, prefix) \ + if (DebugEnabled()) { \ + tc_##id.End(); \ + FDLogger() << __FILE__ << "(" << __LINE__ << "):" << __FUNCTION__ << " " \ + << prefix << " duration = " << tc_##id.Duration() << "s." \ + << std::endl; \ + } + +} // namespace fastdeploy diff --git a/fastdeploy/fastdeploy_runtime.cc b/fastdeploy/fastdeploy_runtime.cc new file mode 100644 index 0000000000..b053db586f --- /dev/null +++ b/fastdeploy/fastdeploy_runtime.cc @@ -0,0 +1,163 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/fastdeploy_runtime.h" +#include "fastdeploy/utils/utils.h" +#ifdef ENABLE_ORT_BACKEND +#include "fastdeploy/backends/ort/ort_backend.h" +#endif + +#ifdef ENABLE_TRT_BACKEND +#include "fastdeploy/backends/tensorrt/trt_backend.h" +#endif + +namespace fastdeploy { + +std::vector GetAvailableBackends() { + std::vector backends; +#ifdef ENABLE_ORT_BACKEND + backends.push_back(Backend::ORT); +#endif +#ifdef ENABLE_TRT_BACKEND + backends.push_back(Backend::TRT); +#endif + return backends; +} + +bool IsBackendAvailable(const Backend& backend) { + std::vector backends = GetAvailableBackends(); + for (size_t i = 0; i < backends.size(); ++i) { + if (backend == backends[i]) { + return true; + } + } + return false; +} + +bool ModelFormatCheck(const std::string& model_file, + const Frontend& model_format) { + if (model_format == Frontend::PADDLE) { + if (model_file.size() < 8 || + model_file.substr(model_file.size() - 8, 8) != ".pdmodel") { + FDLogger() << "With model format of Frontend::PADDLE, the model file " + "should ends with `.pdmodel`, but now it's " + << model_file << std::endl; + return false; + } + } else if (model_format == Frontend::ONNX) { + if (model_file.size() < 5 || + model_file.substr(model_file.size() - 5, 5) != ".onnx") { + FDLogger() << "With model format of Frontend::ONNX, the model file " + "should ends with `.onnx`, but now it's " + << model_file << std::endl; + return false; + } + } else { + FDLogger() << "Only support model format with frontend Frontend::PADDLE / " + "Frontend::ONNX." + << std::endl; + return false; + } + return true; +} + +bool Runtime::Init(const RuntimeOption& _option) { + option = _option; + if (option.backend == Backend::ORT) { + CreateOrtBackend(); + } else if (option.backend == Backend::TRT) { + CreateTrtBackend(); + } else { + FDERROR << "Runtime only support Backend::ORT/Backend::TRT as backend now." + << std::endl; + return false; + } + return true; +} + +TensorInfo Runtime::GetInputInfo(int index) { + return backend_->GetInputInfo(index); +} + +TensorInfo Runtime::GetOutputInfo(int index) { + return backend_->GetOutputInfo(index); +} + +bool Runtime::Infer(std::vector& input_tensors, + std::vector* output_tensors) { + return backend_->Infer(input_tensors, output_tensors); +} + +void Runtime::CreateOrtBackend() { +#ifdef ENABLE_ORT_BACKEND + auto ort_option = OrtBackendOption(); + ort_option.graph_optimization_level = option.ort_graph_opt_level; + ort_option.intra_op_num_threads = option.cpu_thread_num; + ort_option.inter_op_num_threads = option.ort_inter_op_num_threads; + ort_option.execution_mode = option.ort_execution_mode; + ort_option.use_gpu = (option.device == Device::GPU) ? true : false; + ort_option.gpu_id = option.device_id; + FDASSERT(option.model_format == Frontend::PADDLE || + option.model_format == Frontend::ONNX, + "OrtBackend only support model format of Frontend::PADDLE / " + "Frontend::ONNX."); + backend_ = new OrtBackend(); + auto casted_backend = dynamic_cast(backend_); + if (option.model_format == Frontend::ONNX) { + FDASSERT(casted_backend->InitFromOnnx(option.model_file, ort_option), + "Load model from ONNX failed while initliazing OrtBackend."); + } else { + FDASSERT(casted_backend->InitFromPaddle(option.model_file, + option.params_file, ort_option), + "Load model from Paddle failed while initliazing OrtBackend."); + } +#else + FDASSERT(false, "OrtBackend is not available, please compiled with " + "ENABLE_ORT_BACKEND=ON."); +#endif +} + +void Runtime::CreateTrtBackend() { +#ifdef ENABLE_TRT_BACKEND + auto trt_option = TrtBackendOption(); + trt_option.gpu_id = option.device_id; + trt_option.enable_fp16 = option.trt_enable_fp16; + trt_option.enable_int8 = option.trt_enable_int8; + trt_option.max_batch_size = option.trt_max_batch_size; + trt_option.max_workspace_size = option.trt_max_workspace_size; + trt_option.fixed_shape = option.trt_fixed_shape; + trt_option.max_shape = option.trt_max_shape; + trt_option.min_shape = option.trt_max_shape; + trt_option.opt_shape = option.trt_opt_shape; + trt_option.serialize_file = option.trt_serialize_file; + FDASSERT(option.model_format == Frontend::PADDLE || + option.model_format == Frontend::ONNX, + "TrtBackend only support model format of Frontend::PADDLE / " + "Frontend::ONNX."); + backend_ = new TrtBackend(); + auto casted_backend = dynamic_cast(backend_); + if (option.model_format == Frontend::ONNX) { + FDASSERT(casted_backend->InitFromOnnx(option.model_file, trt_option), + "Load model from ONNX failed while initliazing TrtBackend."); + } else { + FDASSERT(casted_backend->InitFromPaddle(option.model_file, + option.params_file, trt_option), + "Load model from Paddle failed while initliazing TrtBackend."); + } +#else + FDASSERT(false, "TrtBackend is not available, please compiled with " + "ENABLE_TRT_BACKEND=ON."); +#endif +} +} // namespace fastdeploy diff --git a/fastdeploy/fastdeploy_runtime.h b/fastdeploy/fastdeploy_runtime.h new file mode 100644 index 0000000000..a0f67486d6 --- /dev/null +++ b/fastdeploy/fastdeploy_runtime.h @@ -0,0 +1,94 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include "fastdeploy/backends/backend.h" +#include "fastdeploy/utils/perf.h" +#include +#include + +namespace fastdeploy { + +enum class Backend { UNKNOWN, ORT, TRT, PDRT }; +enum class Frontend { PADDLE, ONNX }; + +FASTDEPLOY_DECL std::vector GetAvailableBackends(); + +FASTDEPLOY_DECL bool IsBackendAvailable(const Backend& backend); + +bool ModelFormatCheck(const std::string& model_file, + const Frontend& model_format); + +struct FASTDEPLOY_DECL RuntimeOption { + Backend backend = Backend::ORT; + + // for cpu inference and preprocess + int cpu_thread_num = 8; + int device_id = 0; + +#ifdef WITH_GPU + Device device = Device::GPU; +#else + Device device = Device::CPU; +#endif + + // ======Only for ORT Backend======== + // -1 means use default value by ort + // 0: ORT_DISABLE_ALL 1: ORT_ENABLE_BASIC 2: ORT_ENABLE_EXTENDED 3: + // ORT_ENABLE_ALL + int ort_graph_opt_level = -1; + int ort_inter_op_num_threads = -1; + // 0: ORT_SEQUENTIAL 1: ORT_PARALLEL + int ort_execution_mode = -1; + + // ======Only for Trt Backend======= + std::map> trt_fixed_shape; + std::map> trt_max_shape; + std::map> trt_min_shape; + std::map> trt_opt_shape; + std::string trt_serialize_file = ""; + bool trt_enable_fp16 = false; + bool trt_enable_int8 = false; + size_t trt_max_batch_size = 32; + size_t trt_max_workspace_size = 1 << 30; + + std::string model_file = ""; // Path of model file + std::string params_file = ""; // Path of parameters file, can be empty + Frontend model_format = Frontend::PADDLE; // format of input model +}; + +struct FASTDEPLOY_DECL Runtime { + public: + // explicit Runtime(const RuntimeOption& _option = RuntimeOption()); + + bool Init(const RuntimeOption& _option); + + bool Infer(std::vector& input_tensors, + std::vector* output_tensors); + + void CreateOrtBackend(); + + void CreateTrtBackend(); + + int NumInputs() { return backend_->NumInputs(); } + int NumOutputs() { return backend_->NumOutputs(); } + TensorInfo GetInputInfo(int index); + TensorInfo GetOutputInfo(int index); + + RuntimeOption option; + + private: + BaseBackend* backend_; +}; +} // namespace fastdeploy diff --git a/fastdeploy/fastdeploy_runtime.py b/fastdeploy/fastdeploy_runtime.py new file mode 100644 index 0000000000..3eef861f2f --- /dev/null +++ b/fastdeploy/fastdeploy_runtime.py @@ -0,0 +1,55 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +import logging +from . import fastdeploy_main as C + + +class FastDeployModel: + def __init__(self, option): + self._model = None + self._runtime_option = option + if self._runtime_option is None: + self._runtime_option = C.RuntimeOption() + + def model_name(self): + return self._model.model_name() + + def num_inputs(self): + return self._model.num_inputs() + + def num_outputs(self): + return self._model.num_outputs() + + def get_input_info(self, index): + assert index < self.num_inputs( + ), "The index:{} must be less than number of inputs:{}.".format( + index, self.num_inputs()) + return self._model.get_input_info(index) + + def get_output_info(self, index): + assert index < self.num_outputs( + ), "The index:{} must be less than number of outputs:{}.".format( + index, self.num_outputs()) + return self._model.get_output_info(index) + + @property + def runtime_option(self): + return self._model.runtime_option if self._model is not None else None + + @property + def initialized(self): + if self._model is None: + return false + return self._model.initialized() diff --git a/fastdeploy/libs/__init__.py b/fastdeploy/libs/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/fastdeploy/pybind/fastdeploy_model.cc b/fastdeploy/pybind/fastdeploy_model.cc new file mode 100644 index 0000000000..3693bfa4d7 --- /dev/null +++ b/fastdeploy/pybind/fastdeploy_model.cc @@ -0,0 +1,34 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/pybind/main.h" + +namespace fastdeploy { + +void BindFDModel(pybind11::module& m) { + pybind11::class_(m, "FastDeployModel") + .def(pybind11::init<>(), "Default Constructor") + .def("model_name", &FastDeployModel::ModelName) + .def("num_inputs_of_runtime", &FastDeployModel::NumInputsOfRuntime) + .def("num_outputs_of_runtime", &FastDeployModel::NumOutputsOfRuntime) + .def("input_info_of_runtime", &FastDeployModel::InputInfoOfRuntime) + .def("output_info_of_runtime", &FastDeployModel::OutputInfoOfRuntime) + .def("initialized", &FastDeployModel::Initialized) + .def_readwrite("runtime_option", &FastDeployModel::runtime_option) + .def_readwrite("valid_cpu_backends", &FastDeployModel::valid_cpu_backends) + .def_readwrite("valid_gpu_backends", + &FastDeployModel::valid_gpu_backends); +} + +} // namespace fastdeploy diff --git a/fastdeploy/pybind/fastdeploy_runtime.cc b/fastdeploy/pybind/fastdeploy_runtime.cc new file mode 100644 index 0000000000..bbd257f761 --- /dev/null +++ b/fastdeploy/pybind/fastdeploy_runtime.cc @@ -0,0 +1,114 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/pybind/main.h" + +namespace fastdeploy { + +void BindRuntime(pybind11::module& m) { + pybind11::class_(m, "RuntimeOption") + .def(pybind11::init()) + .def_readwrite("model_file", &RuntimeOption::model_file) + .def_readwrite("params_file", &RuntimeOption::params_file) + .def_readwrite("model_format", &RuntimeOption::model_format) + .def_readwrite("backend", &RuntimeOption::backend) + .def_readwrite("cpu_thread_num", &RuntimeOption::cpu_thread_num) + .def_readwrite("device_id", &RuntimeOption::device_id) + .def_readwrite("device", &RuntimeOption::device) + .def_readwrite("ort_graph_opt_level", &RuntimeOption::ort_graph_opt_level) + .def_readwrite("ort_inter_op_num_threads", + &RuntimeOption::ort_inter_op_num_threads) + .def_readwrite("ort_execution_mode", &RuntimeOption::ort_execution_mode) + .def_readwrite("trt_fixed_shape", &RuntimeOption::trt_fixed_shape) + .def_readwrite("trt_max_shape", &RuntimeOption::trt_max_shape) + .def_readwrite("trt_opt_shape", &RuntimeOption::trt_opt_shape) + .def_readwrite("trt_min_shape", &RuntimeOption::trt_min_shape) + .def_readwrite("trt_serialize_file", &RuntimeOption::trt_serialize_file) + .def_readwrite("trt_enable_fp16", &RuntimeOption::trt_enable_fp16) + .def_readwrite("trt_enable_int8", &RuntimeOption::trt_enable_int8) + .def_readwrite("trt_max_batch_size", &RuntimeOption::trt_max_batch_size) + .def_readwrite("trt_max_workspace_size", + &RuntimeOption::trt_max_workspace_size); + pybind11::class_(m, "Runtime") + .def(pybind11::init([](RuntimeOption& option) { + Runtime* runtime = new Runtime(); + runtime->Init(option); + return runtime; + })) + .def("infer", [](Runtime& self, + std::map& data) { + std::vector inputs(data.size()); + int index = 0; + for (auto iter = data.begin(); iter != data.end(); ++iter) { + inputs[index].dtype = NumpyDataTypeToFDDataType(iter->second.dtype()); + inputs[index].shape.insert( + inputs[index].shape.begin(), iter->second.shape(), + iter->second.shape() + iter->second.ndim()); + // TODO(jiangjiajun) Maybe skip memory copy is a better choice + // use SetExternalData + inputs[index].data.resize(iter->second.nbytes()); + memcpy(inputs[index].data.data(), iter->second.mutable_data(), + iter->second.nbytes()); + inputs[index].name = iter->first; + } + + std::vector outputs(self.NumOutputs()); + self.Infer(inputs, &outputs); + + std::vector results; + results.reserve(outputs.size()); + for (size_t i = 0; i < outputs.size(); ++i) { + auto numpy_dtype = FDDataTypeToNumpyDataType(outputs[i].dtype); + results.emplace_back(pybind11::array(numpy_dtype, outputs[i].shape)); + memcpy(results[i].mutable_data(), outputs[i].data.data(), + outputs[i].Numel() * FDDataTypeSize(outputs[i].dtype)); + } + return results; + }); + + pybind11::enum_(m, "Backend", pybind11::arithmetic(), + "Backend for inference.") + .value("UNKOWN", Backend::UNKNOWN) + .value("ORT", Backend::ORT) + .value("TRT", Backend::TRT) + .value("PDRT", Backend::PDRT); + pybind11::enum_(m, "Frontend", pybind11::arithmetic(), + "Frontend for inference.") + .value("PADDLE", Frontend::PADDLE) + .value("ONNX", Frontend::ONNX); + pybind11::enum_(m, "Device", pybind11::arithmetic(), + "Device for inference.") + .value("CPU", Device::CPU) + .value("GPU", Device::GPU); + + pybind11::enum_(m, "FDDataType", pybind11::arithmetic(), + "Data type of FastDeploy.") + .value("BOOL", FDDataType::BOOL) + .value("INT8", FDDataType::INT8) + .value("INT16", FDDataType::INT16) + .value("INT32", FDDataType::INT32) + .value("INT64", FDDataType::INT64) + .value("FP32", FDDataType::FP32) + .value("FP64", FDDataType::FP64) + .value("UINT8", FDDataType::UINT8); + + pybind11::class_(m, "TensorInfo") + .def_readwrite("name", &TensorInfo::name) + .def_readwrite("shape", &TensorInfo::shape) + .def_readwrite("dtype", &TensorInfo::dtype); + + m.def("get_available_backends", []() { return GetAvailableBackends(); }); +} + +} // namespace fastdeploy diff --git a/fastdeploy/pybind/main.cc b/fastdeploy/pybind/main.cc new file mode 100644 index 0000000000..86467215e2 --- /dev/null +++ b/fastdeploy/pybind/main.cc @@ -0,0 +1,115 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/pybind/main.h" + +namespace fastdeploy { + +void BindRuntime(pybind11::module&); +void BindFDModel(pybind11::module&); +void BindVision(pybind11::module&); + +pybind11::dtype FDDataTypeToNumpyDataType(const FDDataType& fd_dtype) { + pybind11::dtype dt; + if (fd_dtype == FDDataType::INT32) { + dt = pybind11::dtype::of(); + } else if (fd_dtype == FDDataType::INT64) { + dt = pybind11::dtype::of(); + } else if (fd_dtype == FDDataType::FP32) { + dt = pybind11::dtype::of(); + } else if (fd_dtype == FDDataType::FP64) { + dt = pybind11::dtype::of(); + } else { + FDASSERT(false, "The function doesn't support data type of " + + FDDataTypeStr(fd_dtype) + "."); + } + return dt; +} + +FDDataType NumpyDataTypeToFDDataType(const pybind11::dtype& np_dtype) { + if (np_dtype.is(pybind11::dtype::of())) { + return FDDataType::INT32; + } else if (np_dtype.is(pybind11::dtype::of())) { + return FDDataType::INT64; + } else if (np_dtype.is(pybind11::dtype::of())) { + return FDDataType::FP32; + } else if (np_dtype.is(pybind11::dtype::of())) { + return FDDataType::FP64; + } + FDASSERT(false, "NumpyDataTypeToFDDataType() only support " + "int32/int64/float32/float64 now."); + return FDDataType::FP32; +} + +void PyArrayToTensor(pybind11::array& pyarray, FDTensor* tensor, + bool share_buffer) { + tensor->dtype = NumpyDataTypeToFDDataType(pyarray.dtype()); + tensor->shape.insert(tensor->shape.begin(), pyarray.shape(), + pyarray.shape() + pyarray.ndim()); + if (share_buffer) { + tensor->external_data_ptr = pyarray.mutable_data(); + } else { + tensor->data.resize(pyarray.nbytes()); + memcpy(tensor->data.data(), pyarray.mutable_data(), pyarray.nbytes()); + } +} + +#ifdef ENABLE_VISION +int NumpyDataTypeToOpenCvType(const pybind11::dtype& np_dtype) { + if (np_dtype.is(pybind11::dtype::of())) { + return CV_32S; + } else if (np_dtype.is(pybind11::dtype::of())) { + return CV_8U; + } else if (np_dtype.is(pybind11::dtype::of())) { + return CV_8U; + } else if (np_dtype.is(pybind11::dtype::of())) { + return CV_32F; + } else { + FDASSERT( + false, + "NumpyDataTypeToOpenCvType() only support int32/int8/uint8/float32 " + "now."); + } + return CV_8U; +} + +cv::Mat PyArrayToCvMat(pybind11::array& pyarray) { + auto cv_type = NumpyDataTypeToOpenCvType(pyarray.dtype()); + FDASSERT( + pyarray.ndim() == 3, + "Require rank of array to be 3 with HWC format while converting it to " + "cv::Mat."); + int channel = *(pyarray.shape() + 2); + int height = *(pyarray.shape()); + int width = *(pyarray.shape() + 1); + return cv::Mat(height, width, CV_MAKETYPE(cv_type, channel), + pyarray.mutable_data()); +} +#endif + +PYBIND11_MODULE(fastdeploy_main, m) { + m.doc() = + "Make programer easier to deploy deeplearning model, save time to save " + "the world!"; + + BindRuntime(m); + BindFDModel(m); +#ifdef ENABLE_VISION + auto vision_module = + m.def_submodule("vision", "Vision module of FastDeploy."); + BindVision(vision_module); +#endif +} + +} // namespace fastdeploy diff --git a/fastdeploy/pybind/main.h b/fastdeploy/pybind/main.h new file mode 100644 index 0000000000..122cb7c2b1 --- /dev/null +++ b/fastdeploy/pybind/main.h @@ -0,0 +1,88 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include + +#include "fastdeploy/fastdeploy_runtime.h" + +#ifdef ENABLE_VISION +#include "fastdeploy/vision.h" +#endif + +namespace fastdeploy { + +void BindBackend(pybind11::module&); +void BindVision(pybind11::module&); + +pybind11::dtype FDDataTypeToNumpyDataType(const FDDataType& fd_dtype); + +FDDataType NumpyDataTypeToFDDataType(const pybind11::dtype& np_dtype); + +void PyArrayToTensor(pybind11::array& pyarray, FDTensor* tensor, + bool share_buffer = false); + +#ifdef ENABLE_VISION +cv::Mat PyArrayToCvMat(pybind11::array& pyarray); +#endif + +template FDDataType CTypeToFDDataType() { + if (std::is_same::value) { + return FDDataType::INT32; + } else if (std::is_same::value) { + return FDDataType::INT64; + } else if (std::is_same::value) { + return FDDataType::FP32; + } else if (std::is_same::value) { + return FDDataType::FP64; + } + FDASSERT(false, + "CTypeToFDDataType only support int32/int64/float32/float64 now."); + return FDDataType::FP32; +} + +template +std::vector +PyBackendInfer(T& self, const std::vector& names, + std::vector& data) { + std::vector inputs(data.size()); + for (size_t i = 0; i < data.size(); ++i) { + // TODO(jiangjiajun) here is considered to use user memory directly + inputs[i].dtype = NumpyDataTypeToFDDataType(data[i].dtype()); + inputs[i].shape.insert(inputs[i].shape.begin(), data[i].shape(), + data[i].shape() + data[i].ndim()); + inputs[i].data.resize(data[i].nbytes()); + memcpy(inputs[i].data.data(), data[i].mutable_data(), data[i].nbytes()); + inputs[i].name = names[i]; + } + + std::vector outputs(self.NumOutputs()); + self.Infer(inputs, &outputs); + + std::vector results; + results.reserve(outputs.size()); + for (size_t i = 0; i < outputs.size(); ++i) { + auto numpy_dtype = FDDataTypeToNumpyDataType(outputs[i].dtype); + results.emplace_back(pybind11::array(numpy_dtype, outputs[i].shape)); + memcpy(results[i].mutable_data(), outputs[i].data.data(), + outputs[i].Numel() * FDDataTypeSize(outputs[i].dtype)); + } + return results; +} + +} // namespace fastdeploy diff --git a/fastdeploy/utils/perf.h b/fastdeploy/utils/perf.h new file mode 100644 index 0000000000..9f451c3a90 --- /dev/null +++ b/fastdeploy/utils/perf.h @@ -0,0 +1,49 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "fastdeploy/utils/utils.h" +#include // NOLINT + +namespace fastdeploy { + +class FASTDEPLOY_DECL TimeCounter { + public: + void Start() { begin_ = std::chrono::system_clock::now(); } + + void End() { end_ = std::chrono::system_clock::now(); } + + double Duration() { + auto duration = + std::chrono::duration_cast(end_ - begin_); + return static_cast(duration.count()) * + std::chrono::microseconds::period::num / + std::chrono::microseconds::period::den; + } + + void PrintInfo(const std::string& prefix = "TimeCounter: ", + bool print_out = true) { + if (!print_out) { + return; + } + FDLogger() << prefix << " duration = " << Duration() << "s." << std::endl; + } + + private: + std::chrono::time_point begin_; + std::chrono::time_point end_; +}; + +} // namespace fastdeploy diff --git a/fastdeploy/utils/utils.cc b/fastdeploy/utils/utils.cc new file mode 100644 index 0000000000..e4e5d1472d --- /dev/null +++ b/fastdeploy/utils/utils.cc @@ -0,0 +1,34 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/utils/utils.h" + +namespace fastdeploy { + +FDLogger::FDLogger(bool verbose, const std::string& prefix) { + verbose_ = verbose; + line_ = ""; + prefix_ = prefix; +} + +FDLogger& FDLogger::operator<<(std::ostream& (*os)(std::ostream&)) { + if (!verbose_) { + return *this; + } + std::cout << prefix_ << " " << line_ << std::endl; + line_ = ""; + return *this; +} + +} // namespace fastdeploy diff --git a/fastdeploy/utils/utils.h b/fastdeploy/utils/utils.h new file mode 100644 index 0000000000..1b9f625b5e --- /dev/null +++ b/fastdeploy/utils/utils.h @@ -0,0 +1,77 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include + +#include +#include +#include +#include + +#if defined(_WIN32) +#ifdef FASTDEPLOY_LIB +#define FASTDEPLOY_DECL __declspec(dllexport) +#else +#define FASTDEPLOY_DECL __declspec(dllimport) +#endif // FASTDEPLOY_LIB +#else +#define FASTDEPLOY_DECL __attribute__((visibility("default"))) +#endif // _WIN32 + +namespace fastdeploy { + +class FASTDEPLOY_DECL FDLogger { + public: + FDLogger() { + line_ = ""; + prefix_ = "[FastDeploy]"; + verbose_ = true; + } + explicit FDLogger(bool verbose, const std::string& prefix = "[FastDeploy]"); + + template FDLogger& operator<<(const T& val) { + if (!verbose_) { + return *this; + } + std::stringstream ss; + ss << val; + line_ += ss.str(); + return *this; + } + FDLogger& operator<<(std::ostream& (*os)(std::ostream&)); + ~FDLogger() { + if (!verbose_ && line_ != "") { + std::cout << line_ << std::endl; + } + } + + private: + std::string line_; + std::string prefix_; + bool verbose_ = true; +}; + +#define FDERROR \ + FDLogger(true, "[ERROR]") \ + << __REL_FILE__ << "(" << __LINE__ << ")::" << __FUNCTION__ << "\t" + +#define FDASSERT(condition, message) \ + if (!(condition)) { \ + FDERROR << message << std::endl; \ + std::abort(); \ + } + +} // namespace fastdeploy diff --git a/fastdeploy/version.py b/fastdeploy/version.py new file mode 100644 index 0000000000..57e083a924 --- /dev/null +++ b/fastdeploy/version.py @@ -0,0 +1,7 @@ +# This file is generated by setup.py. DO NOT EDIT! +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals +version = '0.3.0' +git_version = 'e388f48a0d6b97fc09dd7bd64c4aaa0f5687690c' diff --git a/fastdeploy/vision.h b/fastdeploy/vision.h new file mode 100644 index 0000000000..ca2b9a618a --- /dev/null +++ b/fastdeploy/vision.h @@ -0,0 +1,22 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include "fastdeploy/core/config.h" +#ifdef ENABLE_VISION +#include "fastdeploy/vision/ppcls/model.h" +#include "fastdeploy/vision/ultralytics/yolov5.h" +#endif + +#include "fastdeploy/vision/visualize/visualize.h" diff --git a/fastdeploy/vision/AddModel.md b/fastdeploy/vision/AddModel.md new file mode 100644 index 0000000000..30080bd5e3 --- /dev/null +++ b/fastdeploy/vision/AddModel.md @@ -0,0 +1,3 @@ +# 如何添加一个模型 + +本文档以[yolov5](https://github.com/ultralytics/yolov5)为例,说明如何添加新的模型支持。 diff --git a/fastdeploy/vision/__init__.py b/fastdeploy/vision/__init__.py new file mode 100644 index 0000000000..810b23cd3d --- /dev/null +++ b/fastdeploy/vision/__init__.py @@ -0,0 +1,19 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import + +from . import evaluation +from . import ppcls +from . import ultralytics +from . import visualize diff --git a/fastdeploy/vision/common/processors/base.cc b/fastdeploy/vision/common/processors/base.cc new file mode 100644 index 0000000000..d770522d82 --- /dev/null +++ b/fastdeploy/vision/common/processors/base.cc @@ -0,0 +1,61 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/vision/common/processors/base.h" +#include "fastdeploy/utils/utils.h" + +namespace fastdeploy { +namespace vision { + +ProcLib Processor::default_lib = ProcLib::DEFAULT; + +bool Processor::CpuRun(Mat* mat) { + FDERROR << "Unimplemented CpuRun." << std::endl; + return false; +} + +#ifdef ENABLE_OPENCV_CUDA +bool Processor::GpuRun(Mat* mat) { + FDERROR << "Unimplemented GpuRun." << std::endl; + return false; +} +#endif + +bool Processor::operator()(Mat* mat, ProcLib lib) { + // if default_lib is set + // then use default_lib + ProcLib target = lib; + if (default_lib != ProcLib::DEFAULT) { + target = default_lib; + } + + if (target == ProcLib::OPENCV_CUDA) { +#ifdef ENABLE_OPENCV_CUDA + bool ret = GpuRun(mat); + mat->device = Device::GPU; + return ret; +#else + FDERROR + << "OpenCV is not compiled with CUDA, cannot process image with CUDA." + << std::endl; + return false; +#endif + } + bool ret = CpuRun(mat); + mat->device = Device::CPU; + return ret; +} + +} // namespace vision +} // namespace fastdeploy diff --git a/fastdeploy/vision/common/processors/base.h b/fastdeploy/vision/common/processors/base.h new file mode 100644 index 0000000000..d4138864ac --- /dev/null +++ b/fastdeploy/vision/common/processors/base.h @@ -0,0 +1,48 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "fastdeploy/utils/utils.h" +#include "fastdeploy/vision/common/processors/mat.h" +#include "opencv2/highgui/highgui.hpp" +#include "opencv2/imgproc/imgproc.hpp" + +namespace fastdeploy { +namespace vision { + +enum ProcLib { DEFAULT, OPENCV_CPU, OPENCV_CUDA }; + +class Processor { + public: + // default_lib has the highest priority + // all the function in `processor` will force to use + // default_lib if this flag is set. + // DEFAULT means this flag is not set + static ProcLib default_lib; + + // virtual bool ShapeInfer(const std::vector& in_shape, + // std::vector* out_shape) = 0; + virtual std::string Name() = 0; + virtual bool CpuRun(Mat* mat); +#ifdef ENABLE_OPENCV_CUDA + virtual bool GpuRun(Mat* mat); +#endif + + virtual bool operator()(Mat* mat, + ProcLib lib = ProcLib::OPENCV_CPU); +}; + +} // namespace vision +} // namespace fastdeploy diff --git a/fastdeploy/vision/common/processors/cast.cc b/fastdeploy/vision/common/processors/cast.cc new file mode 100644 index 0000000000..2f8a0993ec --- /dev/null +++ b/fastdeploy/vision/common/processors/cast.cc @@ -0,0 +1,56 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/vision/common/processors/cast.h" + +namespace fastdeploy { +namespace vision { + +bool Cast::CpuRun(Mat* mat) { + if (mat->layout != Layout::CHW) { + FDERROR << "Cast: The input data must be Layout::HWC format!" << std::endl; + return false; + } + cv::Mat* im = mat->GetCpuMat(); + if (dtype_ == "float") { + im->convertTo(*im, CV_32FC(im->channels())); + } else if (dtype_ == "double") { + im->convertTo(*im, CV_64FC(im->channels())); + } + return true; +} + +#ifdef ENABLE_OPENCV_CUDA +bool Cast::GpuRun(Mat* mat) { + if (mat->layout != Layout::CHW) { + FDERROR << "Cast: The input data must be Layout::HWC format!" << std::endl; + return false; + } + cv::cuda::GpuMat* im = mat->GetGpuMat(); + if (dtype_ == "float") { + im->convertTo(*im, CV_32FC(im->channels())); + } else if (dtype_ == "double") { + im->convertTo(*im, CV_64FC(im->channels())); + } + return true; +} +#endif + +bool Cast::Run(Mat* mat, const std::string& dtype, ProcLib lib) { + auto c = Cast(dtype); + return c(mat, lib); +} + +} // namespace vision +} // namespace fastdeploy diff --git a/fastdeploy/vision/common/processors/cast.h b/fastdeploy/vision/common/processors/cast.h new file mode 100644 index 0000000000..1111f08a6e --- /dev/null +++ b/fastdeploy/vision/common/processors/cast.h @@ -0,0 +1,37 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "fastdeploy/vision/common/processors/base.h" + +namespace fastdeploy { +namespace vision { + +class Cast : public Processor { + public: + explicit Cast(const std::string& dtype = "float") : dtype_(dtype) {} + bool CpuRun(Mat* mat); +#ifdef ENABLE_OPENCV_CUDA + bool GpuRun(Mat* mat); +#endif + std::string Name() { return "Cast"; } + static bool Run(Mat* mat, const std::string& dtype, + ProcLib lib = ProcLib::OPENCV_CPU); + + private: + std::string dtype_; +}; +} // namespace vision +} // namespace fastdeploy diff --git a/fastdeploy/vision/common/processors/center_crop.cc b/fastdeploy/vision/common/processors/center_crop.cc new file mode 100644 index 0000000000..27b86ca2d0 --- /dev/null +++ b/fastdeploy/vision/common/processors/center_crop.cc @@ -0,0 +1,63 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/vision/common/processors/center_crop.h" + +namespace fastdeploy { +namespace vision { + +bool CenterCrop::CpuRun(Mat* mat) { + cv::Mat* im = mat->GetCpuMat(); + int height = static_cast(im->rows); + int width = static_cast(im->cols); + if (height < height_ || width < width_) { + FDERROR << "[CenterCrop] Image size less than crop size" << std::endl; + return false; + } + int offset_x = static_cast((width - width_) / 2); + int offset_y = static_cast((height - height_) / 2); + cv::Rect crop_roi(offset_x, offset_y, width_, height_); + *im = (*im)(crop_roi); + mat->SetWidth(width_); + mat->SetHeight(height_); + return true; +} + +#ifdef ENABLE_OPENCV_CUDA +bool CenterCrop::GpuRun(Mat* mat) { + cv::cuda::GpuMat* im = mat->GetGpuMat(); + int height = static_cast(im->rows); + int width = static_cast(im->cols); + if (height < height_ || width < width_) { + FDERROR << "[CenterCrop] Image size less than crop size" << std::endl; + return false; + } + int offset_x = static_cast((width - width_) / 2); + int offset_y = static_cast((height - height_) / 2); + cv::Rect crop_roi(offset_x, offset_y, width_, height_); + *im = (*im)(crop_roi); + mat->SetWidth(width_); + mat->SetHeight(height_); + return true; +} +#endif + +bool CenterCrop::Run(Mat* mat, const int& width, const int& height, + ProcLib lib) { + auto c = CenterCrop(width, height); + return c(mat, lib); +} + +} // namespace vision +} // namespace fastdeploy diff --git a/fastdeploy/vision/common/processors/center_crop.h b/fastdeploy/vision/common/processors/center_crop.h new file mode 100644 index 0000000000..86ad0e20d9 --- /dev/null +++ b/fastdeploy/vision/common/processors/center_crop.h @@ -0,0 +1,40 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "fastdeploy/vision/common/processors/base.h" + +namespace fastdeploy { +namespace vision { + +class CenterCrop : public Processor { + public: + CenterCrop(int width, int height) : height_(height), width_(width) {} + bool CpuRun(Mat* mat); +#ifdef ENABLE_OPENCV_CUDA + bool GpuRun(Mat* mat); +#endif + std::string Name() { return "CenterCrop"; } + + static bool Run(Mat* mat, const int& width, const int& height, + ProcLib lib = ProcLib::OPENCV_CPU); + + private: + int height_; + int width_; +}; + +} // namespace vision +} // namespace fastdeploy diff --git a/fastdeploy/vision/common/processors/color_space_convert.cc b/fastdeploy/vision/common/processors/color_space_convert.cc new file mode 100644 index 0000000000..bcdaf365a4 --- /dev/null +++ b/fastdeploy/vision/common/processors/color_space_convert.cc @@ -0,0 +1,58 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/vision/common/processors/color_space_convert.h" + +namespace fastdeploy { +namespace vision { +bool BGR2RGB::CpuRun(Mat* mat) { + cv::Mat* im = mat->GetCpuMat(); + cv::cvtColor(*im, *im, cv::COLOR_BGR2RGB); + return true; +} + +#ifdef ENABLE_OPENCV_CUDA +bool BGR2RGB::GpuRun(Mat* mat) { + cv::cuda::GpuMat* im = mat->GetGpuMat(); + cv::cuda::cvtColor(*im, *im, cv::COLOR_BGR2RGB); + return true; +} +#endif + +bool RGB2BGR::CpuRun(Mat* mat) { + cv::Mat* im = mat->GetCpuMat(); + cv::cvtColor(*im, *im, cv::COLOR_RGB2BGR); + return true; +} + +#ifdef ENABLE_OPENCV_CUDA +bool RGB2BGR::GpuRun(Mat* mat) { + cv::cuda::GpuMat* im = mat->GetGpuMat(); + cv::cuda::cvtColor(*im, *im, cv::COLOR_RGB2BGR); + return true; +} +#endif + +bool BGR2RGB::Run(Mat* mat, ProcLib lib) { + auto b = BGR2RGB(); + return b(mat, lib); +} + +bool RGB2BGR::Run(Mat* mat, ProcLib lib) { + auto r = RGB2BGR(); + return r(mat, lib); +} + +} // namespace vision +} // namespace fastdeploy diff --git a/fastdeploy/vision/common/processors/color_space_convert.h b/fastdeploy/vision/common/processors/color_space_convert.h new file mode 100644 index 0000000000..472bcf16d0 --- /dev/null +++ b/fastdeploy/vision/common/processors/color_space_convert.h @@ -0,0 +1,44 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "fastdeploy/vision/common/processors/base.h" + +namespace fastdeploy { +namespace vision { + +class BGR2RGB : public Processor { + public: + bool CpuRun(Mat* mat); +#ifdef ENABLE_OPENCV_CUDA + bool GpuRun(Mat* mat); +#endif + virtual std::string Name() { return "BGR2RGB"; } + + static bool Run(Mat* mat, ProcLib lib = ProcLib::OPENCV_CPU); +}; + +class RGB2BGR : public Processor { + public: + bool CpuRun(Mat* mat); +#ifdef ENABLE_OPENCV_CUDA + bool GpuRun(Mat* mat); +#endif + std::string Name() { return "RGB2BGR"; } + + static bool Run(Mat* mat, ProcLib lib = ProcLib::OPENCV_CPU); +}; +} // namespace vision +} // namespace fastdeploy diff --git a/fastdeploy/vision/common/processors/hwc2chw.cc b/fastdeploy/vision/common/processors/hwc2chw.cc new file mode 100644 index 0000000000..5bea87e18d --- /dev/null +++ b/fastdeploy/vision/common/processors/hwc2chw.cc @@ -0,0 +1,75 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/vision/common/processors/hwc2chw.h" + +namespace fastdeploy { +namespace vision { +bool HWC2CHW::CpuRun(Mat* mat) { + if (mat->layout != Layout::HWC) { + FDERROR << "HWC2CHW: The input data is not Layout::HWC format!" + << std::endl; + return false; + } + cv::Mat* im = mat->GetCpuMat(); + cv::Mat im_clone = im->clone(); + int rh = im->rows; + int rw = im->cols; + int rc = im->channels(); + + // float* data = reinterpret_cast(im->data); + for (int i = 0; i < rc; ++i) { + // cv::extractChannel(im_clone, cv::Mat(rh, rw, im->type() % 8, data + i + // * rh * rw), + // i); + cv::extractChannel( + im_clone, + cv::Mat(rh, rw, im->type() % 8, + im->ptr() + i * rh * rw * FDDataTypeSize(mat->Type())), + i); + } + mat->layout = Layout::CHW; + return true; +} + +#ifdef ENABLE_OPENCV_CUDA +bool HWC2CHW::GpuRun(Mat* mat) { + if (mat->layout != Layout::HWC) { + FDERROR << "HWC2CHW: The input data is not Layout::HWC format!" + << std::endl; + return false; + } + cv::cuda::GpuMat* im = mat->GetGpuMat(); + cv::cuda::GpuMat im_clone = im->clone(); + int rh = im->rows; + int rw = im->cols; + int rc = im->channels(); + int num_pixels = rh * rw; + std::vector channels{ + cv::cuda::GpuMat(rh, rw, im->type() % 8, &(im->ptr()[0])), + cv::cuda::GpuMat(rh, rw, im->type() % 8, &(im->ptr()[num_pixels])), + cv::cuda::GpuMat(rh, rw, im->type() % 8, &(im->ptr()[num_pixels * 2]))}; + cv::cuda::split(im_clone, channels); + mat->layout = Layout::CHW; + return true; +} +#endif + +bool HWC2CHW::Run(Mat* mat, ProcLib lib) { + auto h = HWC2CHW(); + return h(mat, lib); +} + +} // namespace vision +} // namespace fastdeploy diff --git a/fastdeploy/vision/common/processors/hwc2chw.h b/fastdeploy/vision/common/processors/hwc2chw.h new file mode 100644 index 0000000000..56fa3ede8c --- /dev/null +++ b/fastdeploy/vision/common/processors/hwc2chw.h @@ -0,0 +1,33 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "fastdeploy/vision/common/processors/base.h" + +namespace fastdeploy { +namespace vision { + +class HWC2CHW : public Processor { + public: + bool CpuRun(Mat* mat); +#ifdef ENABLE_OPENCV_CUDA + bool GpuRun(Mat* mat); +#endif + std::string Name() { return "HWC2CHW"; } + + static bool Run(Mat* mat, ProcLib lib = ProcLib::OPENCV_CPU); +}; +} // namespace vision +} // namespace fastdeploy diff --git a/fastdeploy/vision/common/processors/mat.cc b/fastdeploy/vision/common/processors/mat.cc new file mode 100644 index 0000000000..2afffa416e --- /dev/null +++ b/fastdeploy/vision/common/processors/mat.cc @@ -0,0 +1,117 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "fastdeploy/vision/common/processors/mat.h" +#include "fastdeploy/utils/utils.h" +namespace fastdeploy { +namespace vision { + +#ifdef ENABLE_OPENCV_CUDA +cv::cuda::GpuMat* Mat::GetGpuMat() { + if (device == Device::CPU) { + gpu_mat.upload(cpu_mat); + } + return &gpu_mat; +} +#endif + +cv::Mat* Mat::GetCpuMat() { +#ifdef ENABLE_OPENCV_CUDA + if (device == Device::GPU) { + gpu_mat.download(cpu_mat); + } +#endif + return &cpu_mat; +} + +void Mat::ShareWithTensor(FDTensor* tensor) { + if (device == Device::GPU) { +#ifdef ENABLE_OPENCV_CUDA + tensor->SetExternalData({Channels(), Height(), Width()}, Type(), + GetGpuMat()->ptr()); + tensor->device = Device::GPU; +#endif + } else { + tensor->SetExternalData({Channels(), Height(), Width()}, Type(), + GetCpuMat()->ptr()); + tensor->device = Device::CPU; + } + if (layout == Layout::HWC) { + tensor->shape = {Height(), Width(), Channels()}; + } +} + +bool Mat::CopyToTensor(FDTensor* tensor) { + cv::Mat* im = GetCpuMat(); + int total_bytes = im->total() * im->elemSize(); + if (total_bytes != tensor->Nbytes()) { + FDERROR << "While copy Mat to Tensor, requires the memory size be same, " + "but now size of Tensor = " + << tensor->Nbytes() << ", size of Mat = " << total_bytes << "." + << std::endl; + return false; + } + memcpy(tensor->MutableData(), im->ptr(), im->total() * im->elemSize()); + return true; +} + +void Mat::PrintInfo(const std::string& flag) { + cv::Mat* im = GetCpuMat(); + cv::Scalar mean = cv::mean(*im); + std::cout << flag << ": " + << "Channel=" << Channels() << ", height=" << Height() + << ", width=" << Width() << ", mean="; + for (int i = 0; i < Channels(); ++i) { + std::cout << mean[i] << " "; + } + std::cout << std::endl; +} + +FDDataType Mat::Type() { + int type = -1; + if (device == Device::GPU) { +#ifdef ENABLE_OPENCV_CUDA + type = gpu_mat.type(); +#endif + } else { + type = cpu_mat.type(); + } + if (type < 0) { + FDASSERT(false, + "While calling Mat::Type(), get negative value, which is not " + "expected!."); + } + type = type % 8; + if (type == 0) { + return FDDataType::UINT8; + } else if (type == 1) { + return FDDataType::INT8; + } else if (type == 2) { + FDASSERT(false, "While calling Mat::Type(), get UINT16 type which is not " + "supported now."); + } else if (type == 3) { + return FDDataType::INT16; + } else if (type == 4) { + return FDDataType::INT32; + } else if (type == 5) { + return FDDataType::FP32; + } else if (type == 6) { + return FDDataType::FP64; + } else { + FDASSERT(false, "While calling Mat::Type(), get type = " + + std::to_string(type) + ", which is not expected!."); + } +} + +} // namespace vision +} // namespace fastdeploy diff --git a/fastdeploy/vision/common/processors/mat.h b/fastdeploy/vision/common/processors/mat.h new file mode 100644 index 0000000000..616a4aabee --- /dev/null +++ b/fastdeploy/vision/common/processors/mat.h @@ -0,0 +1,80 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include "fastdeploy/core/fd_tensor.h" +#include "opencv2/core/core.hpp" + +#ifdef ENABLE_OPENCV_CUDA +#include "opencv2/core/cuda.hpp" +#include "opencv2/cudaarithm.hpp" +#include "opencv2/cudaimgproc.hpp" +#include "opencv2/cudawarping.hpp" +#endif + +namespace fastdeploy { +namespace vision { + +enum Layout { HWC, CHW }; + +struct Mat { + explicit Mat(cv::Mat& mat) { + cpu_mat = mat; + device = Device::CPU; + layout = Layout::HWC; + height = cpu_mat.rows; + width = cpu_mat.cols; + channels = cpu_mat.channels(); + } + + private: + int channels; + int height; + int width; + cv::Mat cpu_mat; +#ifdef ENABLE_OPENCV_CUDA + cv::cuda::GpuMat gpu_mat; +#endif + + public: +#ifdef ENABLE_OPENCV_CUDA + cv::cuda::GpuMat* GetGpuMat(); +#endif + cv::Mat* GetCpuMat(); + + FDDataType Type(); + int Channels() const { return channels; } + int Width() const { return width; } + int Height() const { return height; } + void SetChannels(int s) { channels = s; } + void SetWidth(int w) { width = w; } + void SetHeight(int h) { height = h; } + + // Transfer the vision::Mat to FDTensor + void ShareWithTensor(FDTensor* tensor); + // Only support copy to cpu tensor now + bool CopyToTensor(FDTensor* tensor); + + // debug functions + // TODO(jiangjiajun) Develop a right process pipeline with c++ is not a easy + // things + // Will add more debug function here to help debug processed image + // This function will print shape / mean of each channels of the Mat + void PrintInfo(const std::string& flag); + + Layout layout = Layout::HWC; + Device device = Device::CPU; +}; + +} // namespace vision +} // namespace fastdeploy diff --git a/fastdeploy/vision/common/processors/normalize.cc b/fastdeploy/vision/common/processors/normalize.cc new file mode 100644 index 0000000000..b75406070c --- /dev/null +++ b/fastdeploy/vision/common/processors/normalize.cc @@ -0,0 +1,88 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/vision/common/processors/normalize.h" + +namespace fastdeploy { +namespace vision { +Normalize::Normalize(const std::vector& mean, + const std::vector& std, bool is_scale, + const std::vector& min, + const std::vector& max) { + FDASSERT(mean.size() == std.size(), + "Normalize: requires the size of mean equal to the size of std."); + std::vector mean_(mean.begin(), mean.end()); + std::vector std_(std.begin(), std.end()); + std::vector min_(mean.size(), 0.0); + std::vector max_(mean.size(), 255.0); + if (min.size() != 0) { + FDASSERT( + min.size() == mean.size(), + "Normalize: while min is defined, requires the size of min equal to " + "the size of mean."); + min_.assign(min.begin(), min.end()); + } + if (max.size() != 0) { + FDASSERT( + min.size() == mean.size(), + "Normalize: while max is defined, requires the size of max equal to " + "the size of mean."); + max_.assign(max.begin(), max.end()); + } + for (auto c = 0; c < mean_.size(); ++c) { + double alpha = 1.0; + if (is_scale) { + alpha /= (max_[c] - min_[c]); + } + double beta = -1.0 * (mean_[c] + min_[c] * alpha) / std_[c]; + alpha /= std_[c]; + alpha_.push_back(alpha); + beta_.push_back(beta); + } +} + +bool Normalize::CpuRun(Mat* mat) { + cv::Mat* im = mat->GetCpuMat(); + std::vector split_im; + cv::split(*im, split_im); + for (int c = 0; c < im->channels(); c++) { + split_im[c].convertTo(split_im[c], CV_32FC1, alpha_[c], beta_[c]); + } + cv::merge(split_im, *im); + return true; +} + +#ifdef ENABLE_OPENCV_CUDA +bool Normalize::GpuRun(Mat* mat) { + cv::cuda::GpuMat* im = mat->GetGpuMat(); + std::vector split_im; + cv::cuda::split(*im, split_im); + for (int c = 0; c < im->channels(); c++) { + split_im[c].convertTo(split_im[c], CV_32FC1, alpha_[c], beta_[c]); + } + cv::cuda::merge(split_im, *im); + return true; +} +#endif + +bool Normalize::Run(Mat* mat, const std::vector& mean, + const std::vector& std, bool is_scale, + const std::vector& min, + const std::vector& max, ProcLib lib) { + auto n = Normalize(mean, std, is_scale, min, max); + return n(mat, lib); +} + +} // namespace vision +} // namespace fastdeploy diff --git a/fastdeploy/vision/common/processors/normalize.h b/fastdeploy/vision/common/processors/normalize.h new file mode 100644 index 0000000000..eeb839d024 --- /dev/null +++ b/fastdeploy/vision/common/processors/normalize.h @@ -0,0 +1,54 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "fastdeploy/vision/common/processors/base.h" + +namespace fastdeploy { +namespace vision { +class Normalize : public Processor { + public: + Normalize(const std::vector& mean, const std::vector& std, + bool is_scale = true, + const std::vector& min = std::vector(), + const std::vector& max = std::vector()); + bool CpuRun(Mat* mat); +#ifdef ENABLE_OPENCV_CUDA + bool GpuRun(Mat* mat); +#endif + std::string Name() { return "Normalize"; } + + // While use normalize, it is more recommend not use this function + // this function will need to compute result = ((mat / 255) - mean) / std + // if we use the following method + // ``` + // auto norm = Normalize(...) + // norm(mat) + // ``` + // There will be some precomputation in contruct function + // and the `norm(mat)` only need to compute result = mat * alpha + beta + // which will reduce lots of time + static bool Run(Mat* mat, const std::vector& mean, + const std::vector& std, bool is_scale = true, + const std::vector& min = std::vector(), + const std::vector& max = std::vector(), + ProcLib lib = ProcLib::OPENCV_CPU); + + private: + std::vector alpha_; + std::vector beta_; +}; +} // namespace vision +} // namespace fastdeploy diff --git a/fastdeploy/vision/common/processors/pad.cc b/fastdeploy/vision/common/processors/pad.cc new file mode 100644 index 0000000000..3b26d28bc6 --- /dev/null +++ b/fastdeploy/vision/common/processors/pad.cc @@ -0,0 +1,100 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/vision/common/processors/pad.h" + +namespace fastdeploy { +namespace vision { + +bool Pad::CpuRun(Mat* mat) { + if (mat->layout != Layout::HWC) { + FDERROR << "Pad: The input data must be Layout::HWC format!" << std::endl; + return false; + } + if (mat->Channels() > 4) { + FDERROR << "Pad: Only support channels <= 4." << std::endl; + return false; + } + if (mat->Channels() != value_.size()) { + FDERROR << "Pad: Require input channels equals to size of padding value, " + "but now channels = " + << mat->Channels() + << ", the size of padding values = " << value_.size() << "." + << std::endl; + return false; + } + cv::Mat* im = mat->GetCpuMat(); + cv::Scalar value; + if (value_.size() == 1) { + value = cv::Scalar(value_[0]); + } else if (value_.size() == 2) { + value = cv::Scalar(value_[0], value_[1]); + } else if (value_.size() == 3) { + value = cv::Scalar(value_[0], value_[1], value_[2]); + } else { + value = cv::Scalar(value_[0], value_[1], value_[2], value_[3]); + } + cv::copyMakeBorder(*im, *im, top_, bottom_, left_, right_, + cv::BORDER_CONSTANT, value); + mat->SetHeight(im->rows); + mat->SetWidth(im->cols); + return true; +} + +#ifdef ENABLE_OPENCV_CUDA +bool Pad::GpuRun(Mat* mat) { + if (mat->layout != Layout::HWC) { + FDERROR << "Pad: The input data must be Layout::HWC format!" << std::endl; + return false; + } + if (mat->Channels() > 4) { + FDERROR << "Pad: Only support channels <= 4." << std::endl; + return false; + } + if (mat->Channels() != value_.size()) { + FDERROR << "Pad: Require input channels equals to size of padding value, " + "but now channels = " + << mat->Channels() + << ", the size of padding values = " << value_.size() << "." + << std::endl; + return false; + } + cv::cuda::GpuMat* im = mat->GetGpuMat(); + cv::Scalar value; + if (value_.size() == 1) { + value = cv::Scalar(value_[0]); + } else if (value_.size() == 2) { + value = cv::Scalar(value_[0], value_[1]); + } else if (value_.size() == 3) { + value = cv::Scalar(value_[0], value_[1], value_[2]); + } else { + value = cv::Scalar(value_[0], value_[1], value_[2], value_[3]); + } + cv::cuda::copyMakeBorder(*im, *im, top_, bottom_, left_, right_, + cv::BORDER_CONSTANT, value); + mat->SetHeight(im->rows); + mat->SetWidth(im->cols); + return true; +} +#endif + +bool Pad::Run(Mat* mat, const int& top, const int& bottom, const int& left, + const int& right, const std::vector& value, + ProcLib lib) { + auto p = Pad(top, bottom, left, right, value); + return p(mat, lib); +} + +} // namespace vision +} // namespace fastdeploy diff --git a/fastdeploy/vision/common/processors/pad.h b/fastdeploy/vision/common/processors/pad.h new file mode 100644 index 0000000000..1103659603 --- /dev/null +++ b/fastdeploy/vision/common/processors/pad.h @@ -0,0 +1,50 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "fastdeploy/vision/common/processors/base.h" + +namespace fastdeploy { +namespace vision { + +class Pad : public Processor { + public: + Pad(int top, int bottom, int left, int right, + const std::vector& value) { + top_ = top; + bottom_ = bottom; + left_ = left; + right_ = right; + value_ = value; + } + bool CpuRun(Mat* mat); +#ifdef ENABLE_OPENCV_CUDA + bool GpuRun(Mat* mat); +#endif + std::string Name() { return "Pad"; } + + static bool Run(Mat* mat, const int& top, const int& bottom, const int& left, + const int& right, const std::vector& value, + ProcLib lib = ProcLib::OPENCV_CPU); + + private: + int top_; + int bottom_; + int left_; + int right_; + std::vector value_; +}; +} // namespace vision +} // namespace fastdeploy diff --git a/fastdeploy/vision/common/processors/resize.cc b/fastdeploy/vision/common/processors/resize.cc new file mode 100644 index 0000000000..d6b8b9e2fc --- /dev/null +++ b/fastdeploy/vision/common/processors/resize.cc @@ -0,0 +1,90 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/vision/common/processors/resize.h" + +namespace fastdeploy { +namespace vision { + +bool Resize::CpuRun(Mat* mat) { + if (mat->layout != Layout::HWC) { + FDERROR << "Resize: The format of input is not HWC." << std::endl; + return false; + } + cv::Mat* im = mat->GetCpuMat(); + int origin_w = im->cols; + int origin_h = im->rows; + if (width_ > 0 && height_ > 0) { + if (use_scale_) { + float scale_w = width_ * 1.0 / origin_w; + float scale_h = height_ * 1.0 / origin_h; + cv::resize(*im, *im, cv::Size(0, 0), scale_w, scale_h, interp_); + } else { + cv::resize(*im, *im, cv::Size(width_, height_), 0, 0, interp_); + } + } else if (scale_w_ > 0 && scale_h_ > 0) { + cv::resize(*im, *im, cv::Size(0, 0), scale_w_, scale_h_, interp_); + } else { + FDERROR << "Resize: the parameters must satisfy (width > 0 && height > 0) " + "or (scale_w > 0 && scale_h > 0)." + << std::endl; + return false; + } + mat->SetWidth(im->cols); + mat->SetHeight(im->rows); + return true; +} + +#ifdef ENABLE_OPENCV_CUDA +bool Resize::GpuRun(Mat* mat) { + if (mat->layout != Layout::HWC) { + FDERROR << "Resize: The format of input is not HWC." << std::endl; + return false; + } + cv::cuda::GpuMat* im = mat->GetGpuMat(); + int origin_w = im->cols; + int origin_h = im->rows; + if (width_ > 0 && height_ > 0) { + if (use_scale_) { + float scale_w = width_ * 1.0 / origin_w; + float scale_h = height_ * 1.0 / origin_h; + cv::cuda::resize(*im, *im, cv::Size(0, 0), scale_w, scale_h, interp_); + } else { + cv::cuda::resize(*im, *im, cv::Size(width_, height_), 0, 0, interp_); + } + } else if (scale_w_ > 0 && scale_h_ > 0) { + cv::cuda::resize(*im, *im, cv::Size(0, 0), scale_w_, scale_h_, interp_); + } else { + FDERROR << "Resize: the parameters must satisfy (width > 0 && height > 0) " + "or (scale_w > 0 && scale_h > 0)." + << std::endl; + return false; + } + mat->SetWidth(im->cols); + mat->SetHeight(im->rows); + return true; +} +#endif + +bool Resize::Run(Mat* mat, int width, int height, float scale_w, float scale_h, + int interp, bool use_scale, ProcLib lib) { + if (mat->Height() == height && mat->Width() == width) { + return true; + } + auto r = Resize(width, height, scale_w, scale_h, interp, use_scale); + return r(mat, lib); +} + +} // namespace vision +} // namespace fastdeploy diff --git a/fastdeploy/vision/common/processors/resize.h b/fastdeploy/vision/common/processors/resize.h new file mode 100644 index 0000000000..137007997f --- /dev/null +++ b/fastdeploy/vision/common/processors/resize.h @@ -0,0 +1,53 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "fastdeploy/vision/common/processors/base.h" + +namespace fastdeploy { +namespace vision { + +class Resize : public Processor { + public: + Resize(int width, int height, float scale_w = -1.0, float scale_h = -1.0, + int interp = 1, bool use_scale = false) { + width_ = width; + height_ = height; + scale_w_ = scale_w; + scale_h_ = scale_h; + interp_ = interp; + use_scale_ = use_scale; + } + + bool CpuRun(Mat* mat); +#ifdef ENABLE_OPENCV_CUDA + bool GpuRun(Mat* mat); +#endif + std::string Name() { return "Resize"; } + + static bool Run(Mat* mat, int width, int height, float scale_w = -1.0, + float scale_h = -1.0, int interp = 1, bool use_scale = false, + ProcLib lib = ProcLib::OPENCV_CPU); + + private: + int width_; + int height_; + float scale_w_ = -1.0; + float scale_h_ = -1.0; + int interp_ = 1; + bool use_scale_ = false; +}; +} // namespace vision +} // namespace fastdeploy diff --git a/fastdeploy/vision/common/processors/resize_by_short.cc b/fastdeploy/vision/common/processors/resize_by_short.cc new file mode 100644 index 0000000000..8e850425fc --- /dev/null +++ b/fastdeploy/vision/common/processors/resize_by_short.cc @@ -0,0 +1,76 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/vision/common/processors/resize_by_short.h" + +namespace fastdeploy { +namespace vision { + +bool ResizeByShort::CpuRun(Mat* mat) { + cv::Mat* im = mat->GetCpuMat(); + int origin_w = im->cols; + int origin_h = im->rows; + double scale = GenerateScale(origin_w, origin_h); + if (use_scale_) { + cv::resize(*im, *im, cv::Size(), scale, scale, interp_); + } else { + int width = static_cast(round(scale * im->cols)); + int height = static_cast(round(scale * im->rows)); + cv::resize(*im, *im, cv::Size(width, height), 0, 0, interp_); + } + mat->SetWidth(im->cols); + mat->SetHeight(im->rows); + return true; +} + +#ifdef ENABLE_OPENCV_CUDA +bool ResizeByShort::GpuRun(Mat* mat) { + cv::cuda::GpuMat* im = mat->GetGpuMat(); + int origin_w = im->cols; + int origin_h = im->rows; + double scale = GenerateScale(origin_w, origin_h); + im->convertTo(*im, CV_32FC(im->channels())); + if (use_scale_) { + cv::cuda::resize(*im, *im, cv::Size(), scale, scale, interp_); + } else { + int width = static_cast(round(scale * im->cols)); + int height = static_cast(round(scale * im->rows)); + cv::cuda::resize(*im, *im, cv::Size(width, height), 0, 0, interp_); + } + mat->SetWidth(im->cols); + mat->SetHeight(im->rows); + return true; +} +#endif + +double ResizeByShort::GenerateScale(const int origin_w, const int origin_h) { + int im_size_max = std::max(origin_w, origin_h); + int im_size_min = std::min(origin_w, origin_h); + double scale = + static_cast(target_size_) / static_cast(im_size_min); + if (max_size_ > 0) { + if (round(scale * im_size_max) > max_size_) { + scale = static_cast(max_size_) / static_cast(im_size_max); + } + } + return scale; +} + +bool ResizeByShort::Run(Mat* mat, int target_size, int interp, bool use_scale, + int max_size, ProcLib lib) { + auto r = ResizeByShort(target_size, interp, use_scale, max_size); + return r(mat, lib); +} +} // namespace vision +} // namespace fastdeploy diff --git a/fastdeploy/vision/common/processors/resize_by_short.h b/fastdeploy/vision/common/processors/resize_by_short.h new file mode 100644 index 0000000000..023748e9ea --- /dev/null +++ b/fastdeploy/vision/common/processors/resize_by_short.h @@ -0,0 +1,49 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "fastdeploy/vision/common/processors/base.h" + +namespace fastdeploy { +namespace vision { + +class ResizeByShort : public Processor { + public: + ResizeByShort(int target_size, int interp = 1, bool use_scale = true, + int max_size = -1) { + target_size_ = target_size; + max_size_ = max_size; + interp_ = interp; + use_scale_ = use_scale; + } + bool CpuRun(Mat* mat); +#ifdef ENABLE_OPENCV_CUDA + bool GpuRun(Mat* mat); +#endif + std::string Name() { return "ResizeByShort"; } + + static bool Run(Mat* mat, int target_size, int interp = 1, + bool use_scale = true, int max_size = -1, + ProcLib lib = ProcLib::OPENCV_CPU); + + private: + double GenerateScale(const int origin_w, const int origin_h); + int target_size_; + int max_size_; + int interp_; + bool use_scale_; +}; +} // namespace vision +} // namespace fastdeploy diff --git a/fastdeploy/vision/common/processors/transform.h b/fastdeploy/vision/common/processors/transform.h new file mode 100644 index 0000000000..12eec8d72d --- /dev/null +++ b/fastdeploy/vision/common/processors/transform.h @@ -0,0 +1,24 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "fastdeploy/vision/common/processors/cast.h" +#include "fastdeploy/vision/common/processors/center_crop.h" +#include "fastdeploy/vision/common/processors/color_space_convert.h" +#include "fastdeploy/vision/common/processors/hwc2chw.h" +#include "fastdeploy/vision/common/processors/normalize.h" +#include "fastdeploy/vision/common/processors/pad.h" +#include "fastdeploy/vision/common/processors/resize.h" +#include "fastdeploy/vision/common/processors/resize_by_short.h" diff --git a/fastdeploy/vision/common/result.cc b/fastdeploy/vision/common/result.cc new file mode 100644 index 0000000000..d6331e357c --- /dev/null +++ b/fastdeploy/vision/common/result.cc @@ -0,0 +1,101 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "fastdeploy/vision/common/result.h" + +namespace fastdeploy { +namespace vision { + +void ClassifyResult::Clear() { + std::vector().swap(label_ids); + std::vector().swap(scores); +} + +std::string ClassifyResult::Str() { + std::string out; + out = "ClassifyResult(\nlabel_ids: "; + for (size_t i = 0; i < label_ids.size(); ++i) { + out = out + std::to_string(label_ids[i]) + ", "; + } + out += "\nscores: "; + for (size_t i = 0; i < label_ids.size(); ++i) { + out = out + std::to_string(scores[i]) + ", "; + } + out += "\n)"; + return out; +} + +DetectionResult::DetectionResult(const DetectionResult& res) { + boxes.assign(res.boxes.begin(), res.boxes.end()); + scores.assign(res.scores.begin(), res.scores.end()); + label_ids.assign(res.label_ids.begin(), res.label_ids.end()); +} + +void DetectionResult::Clear() { + std::vector>().swap(boxes); + std::vector().swap(scores); + std::vector().swap(label_ids); +} + +void DetectionResult::Reserve(int size) { + boxes.reserve(size); + scores.reserve(size); + label_ids.reserve(size); +} + +void DetectionResult::Resize(int size) { + boxes.resize(size); + scores.resize(size); + label_ids.resize(size); +} + +void DetectionResult::Sort() { + for (size_t i = 0; i < scores.size(); ++i) { + float max_score = scores[i]; + float index = i; + for (size_t j = i + 1; j < scores.size(); ++j) { + if (max_score < scores[j]) { + max_score = scores[j]; + index = j; + } + } + if (i == index) { + continue; + } + float tmp_score = scores[i]; + scores[i] = scores[index]; + scores[index] = tmp_score; + int32_t tmp_label_id = label_ids[i]; + label_ids[i] = label_ids[index]; + label_ids[index] = tmp_label_id; + std::array tmp_box = boxes[i]; + boxes[i] = boxes[index]; + boxes[index] = tmp_box; + } +} + +std::string DetectionResult::Str() { + std::string out; + out = "DetectionResult: [xmin, ymin, xmax, ymax, score, label_id]\n"; + for (size_t i = 0; i < boxes.size(); ++i) { + out = out + std::to_string(boxes[i][0]) + "," + + std::to_string(boxes[i][1]) + ", " + std::to_string(boxes[i][2]) + + ", " + std::to_string(boxes[i][3]) + ", " + + std::to_string(scores[i]) + ", " + std::to_string(label_ids[i]) + + "\n"; + } + return out; +} + +} // namespace vision +} // namespace fastdeploy diff --git a/fastdeploy/vision/common/result.h b/fastdeploy/vision/common/result.h new file mode 100644 index 0000000000..eb548ba664 --- /dev/null +++ b/fastdeploy/vision/common/result.h @@ -0,0 +1,57 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include "fastdeploy/fastdeploy_model.h" +#include "opencv2/core/core.hpp" + +namespace fastdeploy { +namespace vision { +enum ResultType { UNKNOWN, CLASSIFY, DETECTION, SEGMENTATION }; + +struct FASTDEPLOY_DECL BaseResult { + ResultType type = ResultType::UNKNOWN; +}; + +struct FASTDEPLOY_DECL ClassifyResult : public BaseResult { + std::vector label_ids; + std::vector scores; + ResultType type = ResultType::CLASSIFY; + + void Clear(); + std::string Str(); +}; + +struct FASTDEPLOY_DECL DetectionResult : public BaseResult { + // box: xmin, ymin, xmax, ymax + std::vector> boxes; + std::vector scores; + std::vector label_ids; + ResultType type = ResultType::DETECTION; + + DetectionResult() {} + DetectionResult(const DetectionResult& res); + + void Clear(); + + void Reserve(int size); + + void Resize(int size); + + void Sort(); + + std::string Str(); +}; + +} // namespace vision +} // namespace fastdeploy diff --git a/fastdeploy/__main__.py b/fastdeploy/vision/evaluation/__init__.py similarity index 89% rename from fastdeploy/__main__.py rename to fastdeploy/vision/evaluation/__init__.py index 6abf5cf8f7..1158095ec5 100644 --- a/fastdeploy/__main__.py +++ b/fastdeploy/vision/evaluation/__init__.py @@ -11,8 +11,5 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - -import fastdeploy - -if __name__ == "__main__": - fastdeploy.main() +from __future__ import absolute_import +from .classify import eval_classify diff --git a/fastdeploy/vision/evaluation/classify.py b/fastdeploy/vision/evaluation/classify.py new file mode 100644 index 0000000000..e9cc421fe0 --- /dev/null +++ b/fastdeploy/vision/evaluation/classify.py @@ -0,0 +1,57 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +import fastdeploy as fd +import cv2 +import os +import re +from tqdm import trange + + +def topk_accuracy(topk_list, label_list): + match_array = np.logical_or.reduce(topk_list == label_list, axis=1) + topk_acc_score = match_array.sum() / match_array.shape[0] + return topk_acc_score + + +def eval_classify(model, image_file_path, label_file_path, topk=5): + result_list = [] + label_list = [] + image_label_dict = {} + assert os.path.isdir( + image_file_path), "The image_file_path:{} is not a directory.".format( + image_file_path) + assert os.path.isfile( + label_file_path), "The label_file_path:{} is not a file.".format( + label_file_path) + assert isinstance(topk, int), "The tok:{} is not int type".format(topk) + with open(label_file_path, 'r') as file: + lines = file.readlines() + for line in lines: + items = line.strip().split() + image_name = items[0] + label = items[1] + image_label_dict[image_name] = int(label) + images_num = len(image_label_dict) + for (image, label), i in zip(image_label_dict.items(), + trange( + images_num, desc='Inference Progress')): + label_list.append([label]) + image_path = os.path.join(image_file_path, image) + im = cv2.imread(image_path) + result = model.predict(im, topk) + result_list.append(result.label_ids) + + topk_acc_score = topk_accuracy(np.array(result_list), np.array(label_list)) + return topk_acc_score diff --git a/fastdeploy/vision/ppcls/__init__.py b/fastdeploy/vision/ppcls/__init__.py new file mode 100644 index 0000000000..e27d1f47ae --- /dev/null +++ b/fastdeploy/vision/ppcls/__init__.py @@ -0,0 +1,37 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +import logging +import fastdeploy as fd +from fastdeploy import C + + +class Model(fd.FastDeployModel): + def __init__(self, + model_file, + params_file, + config_file, + backend_option=None, + model_format=fd.Frontend.PADDLE): + super(Model, self).__init__(backend_option) + + assert model_format == fd.Frontend.PADDLE, "PaddleClas only support model format of Frontend.Paddle now." + self._model = C.vision.ppcls.Model(model_file, params_file, + config_file, self._runtime_option, + model_format) + assert self.initialized, "PaddleClas model initialize failed." + + def predict(self, input_image, topk=1): + return self._model.predict(input_image, topk) diff --git a/fastdeploy/vision/ppcls/model.cc b/fastdeploy/vision/ppcls/model.cc new file mode 100644 index 0000000000..76bbed4d8e --- /dev/null +++ b/fastdeploy/vision/ppcls/model.cc @@ -0,0 +1,140 @@ + +#include "fastdeploy/vision/ppcls/model.h" +#include "fastdeploy/vision/utils/utils.h" +#include "yaml-cpp/yaml.h" + +namespace fastdeploy { +namespace vision { +namespace ppcls { + +Model::Model(const std::string& model_file, const std::string& params_file, + const std::string& config_file, const RuntimeOption& custom_option, + const Frontend& model_format) { + config_file_ = config_file; + valid_cpu_backends = {Backend::ORT, Backend::PDRT}; + valid_gpu_backends = {Backend::ORT, Backend::PDRT}; + runtime_option = custom_option; + runtime_option.model_format = model_format; + runtime_option.model_file = model_file; + runtime_option.params_file = params_file; + initialized = Initialize(); +} + +bool Model::Initialize() { + if (!BuildPreprocessPipelineFromConfig()) { + FDERROR << "Failed to build preprocess pipeline from configuration file." + << std::endl; + return false; + } + if (!InitRuntime()) { + FDERROR << "Failed to initialize fastdeploy backend." << std::endl; + return false; + } + return true; +} + +bool Model::BuildPreprocessPipelineFromConfig() { + processors_.clear(); + YAML::Node cfg; + try { + cfg = YAML::LoadFile(config_file_); + } catch (YAML::BadFile& e) { + FDERROR << "Failed to load yaml file " << config_file_ + << ", maybe you should check this file." << std::endl; + return false; + } + auto preprocess_cfg = cfg["PreProcess"]["transform_ops"]; + processors_.push_back(std::make_shared()); + for (const auto& op : preprocess_cfg) { + FDASSERT(op.IsMap(), + "Require the transform information in yaml be Map type."); + auto op_name = op.begin()->first.as(); + if (op_name == "ResizeImage") { + int target_size = op.begin()->second["resize_short"].as(); + bool use_scale = false; + int interp = 1; + processors_.push_back( + std::make_shared(target_size, 1, use_scale)); + } else if (op_name == "CropImage") { + int width = op.begin()->second["size"].as(); + int height = op.begin()->second["size"].as(); + processors_.push_back(std::make_shared(width, height)); + } else if (op_name == "NormalizeImage") { + auto mean = op.begin()->second["mean"].as>(); + auto std = op.begin()->second["std"].as>(); + auto scale = op.begin()->second["scale"].as(); + FDASSERT((scale - 0.00392157) < 1e-06 && (scale - 0.00392157) > -1e-06, + "Only support scale in Normalize be 0.00392157, means the pixel " + "is in range of [0, 255]."); + processors_.push_back(std::make_shared(mean, std)); + } else if (op_name == "ToCHWImage") { + processors_.push_back(std::make_shared()); + } else { + FDERROR << "Unexcepted preprocess operator: " << op_name << "." + << std::endl; + return false; + } + } + return true; +} + +bool Model::Preprocess(Mat* mat, FDTensor* output) { + for (size_t i = 0; i < processors_.size(); ++i) { + if (!(*(processors_[i].get()))(mat)) { + FDERROR << "Failed to process image data in " << processors_[i]->Name() + << "." << std::endl; + return false; + } + } + + int channel = mat->Channels(); + int width = mat->Width(); + int height = mat->Height(); + output->name = InputInfoOfRuntime(0).name; + output->SetExternalData({1, channel, height, width}, FDDataType::FP32, + mat->GetCpuMat()->ptr()); + return true; +} + +bool Model::Postprocess(const FDTensor& infer_result, ClassifyResult* result, + int topk) { + int num_classes = infer_result.shape[1]; + const float* infer_result_buffer = + reinterpret_cast(infer_result.data.data()); + topk = std::min(num_classes, topk); + result->label_ids = + utils::TopKIndices(infer_result_buffer, num_classes, topk); + result->scores.resize(topk); + for (int i = 0; i < topk; ++i) { + result->scores[i] = *(infer_result_buffer + result->label_ids[i]); + } + return true; +} + +bool Model::Predict(cv::Mat* im, ClassifyResult* result, int topk) { + Mat mat(*im); + std::vector processed_data(1); + if (!Preprocess(&mat, &(processed_data[0]))) { + FDERROR << "Failed to preprocess input data while using model:" + << ModelName() << "." << std::endl; + return false; + } + + std::vector infer_result(1); + if (!Infer(processed_data, &infer_result)) { + FDERROR << "Failed to inference while using model:" << ModelName() << "." + << std::endl; + return false; + } + + if (!Postprocess(infer_result[0], result, topk)) { + FDERROR << "Failed to postprocess while using model:" << ModelName() << "." + << std::endl; + return false; + } + return true; +} + +} // namespace ppcls +} // namespace vision +} // namespace fastdeploy diff --git a/fastdeploy/vision/ppcls/model.h b/fastdeploy/vision/ppcls/model.h new file mode 100644 index 0000000000..f649ca1977 --- /dev/null +++ b/fastdeploy/vision/ppcls/model.h @@ -0,0 +1,37 @@ +#pragma once +#include "fastdeploy/fastdeploy_model.h" +#include "fastdeploy/vision/common/result.h" +#include "fastdeploy/vision/common/processors/transform.h" + +namespace fastdeploy { +namespace vision { +namespace ppcls { + +class FASTDEPLOY_DECL Model : public FastDeployModel { + public: + Model(const std::string& model_file, const std::string& params_file, + const std::string& config_file, + const RuntimeOption& custom_option = RuntimeOption(), + const Frontend& model_format = Frontend::PADDLE); + + std::string ModelName() const { return "ppclas-classify"; } + + bool Initialize(); + + bool BuildPreprocessPipelineFromConfig(); + + bool Preprocess(Mat* mat, FDTensor* outputs); + + bool Postprocess(const FDTensor& infer_result, ClassifyResult* result, + int topk = 1); + + // TODO(jiangjiajun) Batch is on the way + virtual bool Predict(cv::Mat* im, ClassifyResult* result, int topk = 1); + + private: + std::vector> processors_; + std::string config_file_; +}; +} // namespace ppcls +} // namespace vision +} // namespace fastdeploy diff --git a/fastdeploy/vision/ppcls/ppcls_pybind.cc b/fastdeploy/vision/ppcls/ppcls_pybind.cc new file mode 100644 index 0000000000..ef3fffee8e --- /dev/null +++ b/fastdeploy/vision/ppcls/ppcls_pybind.cc @@ -0,0 +1,30 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "fastdeploy/pybind/main.h" + +namespace fastdeploy { +void BindPpClsModel(pybind11::module& m) { + auto ppcls_module = m.def_submodule("ppcls", "Module to deploy PaddleClas."); + pybind11::class_(ppcls_module, "Model") + .def(pybind11::init()) + .def("predict", + [](vision::ppcls::Model& self, pybind11::array& data, int topk = 1) { + auto mat = PyArrayToCvMat(data); + vision::ClassifyResult res; + self.Predict(&mat, &res, topk); + return res; + }); +} +} // namespace fastdeploy diff --git a/fastdeploy/vision/ultralytics/__init__.py b/fastdeploy/vision/ultralytics/__init__.py new file mode 100644 index 0000000000..f7ea91f60d --- /dev/null +++ b/fastdeploy/vision/ultralytics/__init__.py @@ -0,0 +1,102 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +import logging +import fastdeploy as fd +from fastdeploy import C + + +class YOLOv5(fd.FastDeployModel): + def __init__(self, + model_file, + params_file=None, + backend_option=None, + model_format=fd.Frontend.ONNX): + # 调用基函数进行backend_option的初始化 + # 初始化后的option保存在self._option + super(YOLOv5, self).__init__(backend_option) + + if model_format == fd.Frontend.ONNX: + # 加载的模型需要保存在成员变量self._model中 + self._model = C.vision.ultralytics.YOLOv5( + model_file, self._runtime_option, model_format) + elif model_format == fd.Frontend.PADDLE: + self._model = C.vision.ultralytics.YOLOv5( + model_file, params_file, self._runtime_option, model_format) + # 通过self.initialized判断整个模型的初始化是否成功 + assert self.initialized, "YOLOv5 initialize failed." + + def predict(self, input_image, conf_threshold=0.25, nms_iou_threshold=0.5): + return self._model.predict(input_image, conf_threshold, + nms_iou_threshold) + + # 一些跟YOLOv5模型有关的属性封装 + # 多数是预处理相关,可通过修改如model.size = [1280, 1280]改变预处理时resize的大小(前提是模型支持) + @property + def size(self): + return self.model.size + + @property + def padding_value(self): + return self.model.padding_value + + @property + def is_no_pad(self): + return self.model.is_no_pad + + @property + def is_scale_up(self): + return self.model.is_scale_up + + @property + def stride(self): + return self.model.stride + + @size.setter + def size(self, wh): + assert isinstance(wh, [ + list, tuple + ]), "The value to set `size` must be type of tuple or list." + assert len( + wh + ) == 2, "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format( + len(wh)) + self.model.size = wh + + @padding_value.setter + def padding_value(self, value): + assert isinstance( + value, + list), "The value to set `padding_value` must be type of list." + self.model.padding_value = value + + @is_no_pad.setter + def is_no_pad(self, value): + assert isinstance( + value, bool), "The value to set `is_no_pad` must be type of bool." + self.model.is_no_pad = value + + @is_scale_up.setter + def is_scale_up(self, value): + assert isinstance( + value, + bool), "The value to set `is_scale_up` must be type of bool." + self.model.is_scale_up = value + + @stride.setter + def stride(self, value): + assert isinstance( + value, int), "The value to set `stride` must be type of int." + self.model.stride = value diff --git a/fastdeploy/vision/ultralytics/ultralytics_pybind.cc b/fastdeploy/vision/ultralytics/ultralytics_pybind.cc new file mode 100644 index 0000000000..abca7e488e --- /dev/null +++ b/fastdeploy/vision/ultralytics/ultralytics_pybind.cc @@ -0,0 +1,40 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/pybind/main.h" + +namespace fastdeploy { +void BindUltralytics(pybind11::module& m) { + auto ultralytics_module = + m.def_submodule("ultralytics", "https://github.com/ultralytics/yolov5"); + pybind11::class_( + ultralytics_module, "YOLOv5") + .def(pybind11::init()) + .def(pybind11::init()) + .def("predict", + [](vision::ultralytics::YOLOv5& self, pybind11::array& data, + float conf_threshold, float nms_iou_threshold) { + auto mat = PyArrayToCvMat(data); + vision::DetectionResult res; + self.Predict(&mat, &res, conf_threshold, nms_iou_threshold); + return res; + }) + .def_readwrite("size", &vision::ultralytics::YOLOv5::size) + .def_readwrite("padding_value", + &vision::ultralytics::YOLOv5::padding_value) + .def_readwrite("is_mini_pad", &vision::ultralytics::YOLOv5::is_mini_pad) + .def_readwrite("is_no_pad", &vision::ultralytics::YOLOv5::is_no_pad) + .def_readwrite("is_scale_up", &vision::ultralytics::YOLOv5::stride); +} +} // namespace fastdeploy diff --git a/fastdeploy/vision/ultralytics/yolov5.cc b/fastdeploy/vision/ultralytics/yolov5.cc new file mode 100644 index 0000000000..131396576f --- /dev/null +++ b/fastdeploy/vision/ultralytics/yolov5.cc @@ -0,0 +1,222 @@ +#include "fastdeploy/vision/ultralytics/yolov5.h" +#include "fastdeploy/utils/perf.h" +#include "fastdeploy/vision/utils/utils.h" + +namespace fastdeploy { +namespace vision { +namespace ultralytics { + +void LetterBox(Mat* mat, std::vector size, std::vector color, + bool _auto, bool scale_fill = false, bool scale_up = true, + int stride = 32) { + float scale = + std::min(size[1] * 1.0 / mat->Height(), size[0] * 1.0 / mat->Width()); + if (!scale_up) { + scale = std::min(scale, 1.0f); + } + + int resize_h = int(round(mat->Height() * scale)); + int resize_w = int(round(mat->Width() * scale)); + + int pad_w = size[0] - resize_w; + int pad_h = size[1] - resize_h; + if (_auto) { + pad_h = pad_h % stride; + pad_w = pad_w % stride; + } else if (scale_fill) { + pad_h = 0; + pad_w = 0; + resize_h = size[1]; + resize_w = size[0]; + } + Resize::Run(mat, resize_w, resize_h); + if (pad_h > 0 || pad_w > 0) { + float half_h = pad_h * 1.0 / 2; + int top = int(round(half_h - 0.1)); + int bottom = int(round(half_h + 0.1)); + float half_w = pad_w * 1.0 / 2; + int left = int(round(half_w - 0.1)); + int right = int(round(half_w + 0.1)); + Pad::Run(mat, top, bottom, left, right, color); + } +} + +YOLOv5::YOLOv5(const std::string& model_file, + const RuntimeOption& custom_option, + const Frontend& model_format) { + valid_cpu_backends = {Backend::ORT}; // 指定可用的CPU后端 + valid_gpu_backends = {Backend::ORT}; // 指定可用的GPU后端 + runtime_option = custom_option; + runtime_option.model_format = model_format; // 指定模型格式 + runtime_option.model_file = model_file; + // initialized用于标记模型是否初始化成功 + // C++或Python中可调用YOLOv5.Intialized() / + // YOLOv5.initialized()判断模型是否初始化成功 + initialized = Initialize(); +} + +YOLOv5::YOLOv5(const std::string& model_file, const std::string& params_file, + const RuntimeOption& custom_option, + const Frontend& model_format) { + valid_cpu_backends = {Backend::PDRT}; // 指定可用的CPU后端 + valid_gpu_backends = {Backend::PDRT}; // 指定可用的GPU后端 + runtime_option = custom_option; + runtime_option.model_format = model_format; + runtime_option.model_file = model_file; + runtime_option.params_file = params_file; + initialized = Initialize(); +} + +bool YOLOv5::Initialize() { + // parameters for preprocess + size = {640, 640}; + padding_value = {114.0, 114.0, 114.0}; + is_mini_pad = false; + is_no_pad = false; + is_scale_up = true; + stride = 32; + + if (!InitRuntime()) { + FDERROR << "Failed to initialize fastdeploy backend." << std::endl; + return false; + } + return true; +} + +bool YOLOv5::Preprocess(Mat* mat, FDTensor* output, + std::map>* im_info) { + // yolov5's preprocess steps + // 1. letterbox + // 2. BGR->RGB + // 3. HWC->CHW + LetterBox(mat, size, padding_value, is_mini_pad, is_no_pad, is_scale_up, + stride); + BGR2RGB::Run(mat); + Normalize::Run(mat, std::vector(mat->Channels(), 0.0), + std::vector(mat->Channels(), 1.0)); + + // Record output shape of preprocessed image + (*im_info)["output_shape"] = {static_cast(mat->Height()), + static_cast(mat->Width())}; + + HWC2CHW::Run(mat); + Cast::Run(mat, "float"); + mat->ShareWithTensor(output); + output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c + return true; +} + +bool YOLOv5::Postprocess( + FDTensor& infer_result, DetectionResult* result, + const std::map>& im_info, + float conf_threshold, float nms_iou_threshold) { + FDASSERT(infer_result.shape[0] == 1, "Only support batch =1 now."); + result->Clear(); + result->Reserve(infer_result.shape[1]); + if (infer_result.dtype != FDDataType::FP32) { + FDERROR << "Only support post process with float32 data." << std::endl; + return false; + } + float* data = static_cast(infer_result.Data()); + for (size_t i = 0; i < infer_result.shape[1]; ++i) { + int s = i * infer_result.shape[2]; + float confidence = data[s + 4]; + float* max_class_score = + std::max_element(data + s + 5, data + s + infer_result.shape[2]); + confidence *= (*max_class_score); + // filter boxes by conf_threshold + if (confidence <= conf_threshold) { + continue; + } + // convert from [x, y, w, h] to [x1, y1, x2, y2] + result->boxes.emplace_back(std::array{ + data[s] - data[s + 2] / 2, data[s + 1] - data[s + 3] / 2, + data[s + 0] + data[s + 2] / 2, data[s + 1] + data[s + 3] / 2}); + result->label_ids.push_back(std::distance(data + s + 5, max_class_score)); + result->scores.push_back(confidence); + } + utils::NMS(result, nms_iou_threshold); + + // scale the boxes to the origin image shape + auto iter_out = im_info.find("output_shape"); + auto iter_ipt = im_info.find("input_shape"); + FDASSERT(iter_out != im_info.end() && iter_ipt != im_info.end(), + "Cannot find input_shape or output_shape from im_info."); + float out_h = iter_out->second[0]; + float out_w = iter_out->second[1]; + float ipt_h = iter_ipt->second[0]; + float ipt_w = iter_ipt->second[1]; + float scale = std::min(out_h / ipt_h, out_w / ipt_w); + for (size_t i = 0; i < result->boxes.size(); ++i) { + float pad_h = (out_h - ipt_h * scale) / 2; + float pad_w = (out_w - ipt_w * scale) / 2; + + // clip box + result->boxes[i][0] = std::max((result->boxes[i][0] - pad_w) / scale, 0.0f); + result->boxes[i][1] = std::max((result->boxes[i][1] - pad_h) / scale, 0.0f); + result->boxes[i][2] = std::max((result->boxes[i][2] - pad_w) / scale, 0.0f); + result->boxes[i][3] = std::max((result->boxes[i][3] - pad_h) / scale, 0.0f); + result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w); + result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h); + result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w); + result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h); + } + return true; +} + +bool YOLOv5::Predict(cv::Mat* im, DetectionResult* result, float conf_threshold, + float nms_iou_threshold) { + +#ifdef FASTDEPLOY_DEBUG + TIMERECORD_START(0) +#endif + + Mat mat(*im); + std::vector input_tensors(1); + + std::map> im_info; + + // Record the shape of image and the shape of preprocessed image + im_info["input_shape"] = {static_cast(mat.Height()), + static_cast(mat.Width())}; + im_info["output_shape"] = {static_cast(mat.Height()), + static_cast(mat.Width())}; + + if (!Preprocess(&mat, &input_tensors[0], &im_info)) { + FDERROR << "Failed to preprocess input image." << std::endl; + return false; + } + +#ifdef FASTDEPLOY_DEBUG + TIMERECORD_END(0, "Preprocess") + TIMERECORD_START(1) +#endif + + input_tensors[0].name = InputInfoOfRuntime(0).name; + + std::vector output_tensors; + if (!Infer(input_tensors, &output_tensors)) { + FDERROR << "Failed to inference." << std::endl; + return false; + } + +#ifdef FASTDEPLOY_DEBUG + TIMERECORD_END(1, "Inference") + TIMERECORD_START(2) +#endif + + if (!Postprocess(output_tensors[0], result, im_info, conf_threshold, + nms_iou_threshold)) { + FDERROR << "Failed to post process." << std::endl; + return false; + } + +#ifdef FASTDEPLOY_DEBUG + TIMERECORD_END(2, "Postprocess") +#endif + return true; +} + +} // namespace ultralytics +} // namespace vision +} // namespace fastdeploy diff --git a/fastdeploy/vision/ultralytics/yolov5.h b/fastdeploy/vision/ultralytics/yolov5.h new file mode 100644 index 0000000000..711ceee9f5 --- /dev/null +++ b/fastdeploy/vision/ultralytics/yolov5.h @@ -0,0 +1,89 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "fastdeploy/fastdeploy_model.h" +#include "fastdeploy/vision/common/result.h" +#include "fastdeploy/vision/common/processors/transform.h" + +namespace fastdeploy { +namespace vision { +namespace ultralytics { + +class FASTDEPLOY_DECL YOLOv5 : public FastDeployModel { + public: + // 支持ONNX格式模型的输入 + YOLOv5(const std::string& model_file, + const RuntimeOption& option = RuntimeOption(), + const Frontend& model_format = Frontend::ONNX); + + // 在X2Paddle转成Paddle后,支持Paddle格式模型的输入 + YOLOv5(const std::string& model_file, const std::string& params_file, + const RuntimeOption& custom_option = RuntimeOption(), + const Frontend& model_format = Frontend::PADDLE); + + // 定义模型的名称 + virtual std::string ModelName() const { return "ultralytics/yolov5"; } + + // 初始化函数,包括初始化后端,以及其它模型推理需要涉及的操作 + virtual bool Initialize(); + + // 输入图像预处理操作 + // Mat为FastDeploy定义的数据结构 + // FDTensor为预处理后的Tensor数据,传给后端进行推理 + // im_info为预处理过程保存的数据,在后处理中需要用到 + virtual bool Preprocess(Mat* mat, FDTensor* outputs, + std::map>* im_info); + + // 后端推理结果后处理,输出给用户 + // infer_result 为后端推理后的输出Tensor + // result 为模型预测的结果 + // im_info 为预处理记录的信息,后处理用于还原box + // conf_threshold 后处理时过滤box的置信度阈值 + // nms_iou_threshold 后处理时NMS设定的iou阈值 + virtual bool + Postprocess(FDTensor& infer_result, DetectionResult* result, + const std::map>& im_info, + float conf_threshold, float nms_iou_threshold); + + // 模型预测接口,即用户调用的接口 + // im 为用户的输入数据,目前对于CV均定义为cv::Mat + // result 为模型预测的输出结构体 + // conf_threshold 为后处理的参数 + // nms_iou_threshold 为后处理的参数 + virtual bool Predict(cv::Mat* im, DetectionResult* result, + float conf_threshold = 0.25, + float nms_iou_threshold = 0.5); + + // 以下为模型在预测时的一些参数,基本是前后处理所需 + // 用户在创建模型后,可根据模型的要求,以及自己的需求 + // 对参数进行修改 + // tuple of (width, height) + std::vector size; + // padding value, size should be same with Channels + std::vector padding_value; + // only pad to the minimum rectange which height and width is times of stride + bool is_mini_pad; + // while is_mini_pad = false and is_no_pad = true, will resize the image to + // the set size + bool is_no_pad; + // if is_scale_up is false, the input image only can be zoom out, the maximum + // resize scale cannot exceed 1.0 + bool is_scale_up; + // padding stride, for is_mini_pad + int stride; +}; +} // namespace ultralytics +} // namespace vision +} // namespace fastdeploy diff --git a/fastdeploy/vision/utils/nms.cc b/fastdeploy/vision/utils/nms.cc new file mode 100644 index 0000000000..9f2c1c7bca --- /dev/null +++ b/fastdeploy/vision/utils/nms.cc @@ -0,0 +1,72 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/utils/perf.h" +#include "fastdeploy/vision/utils/utils.h" + +namespace fastdeploy { +namespace vision { +namespace utils { + +// The implementation refers to +// https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.4/deploy/cpp/src/utils.cc +void NMS(DetectionResult* result, float iou_threshold) { + result->Sort(); + + std::vector area_of_boxes(result->boxes.size()); + std::vector suppressed(result->boxes.size(), 0); + for (size_t i = 0; i < result->boxes.size(); ++i) { + area_of_boxes[i] = (result->boxes[i][2] - result->boxes[i][0] + 1) * + (result->boxes[i][3] - result->boxes[i][1] + 1); + } + + for (size_t i = 0; i < result->boxes.size(); ++i) { + if (suppressed[i] == 1) { + continue; + } + for (size_t j = i + 1; j < result->boxes.size(); ++j) { + if (suppressed[j] == 1) { + continue; + } + float xmin = std::max(result->boxes[i][0], result->boxes[j][0]); + float ymin = std::max(result->boxes[i][1], result->boxes[j][1]); + float xmax = std::min(result->boxes[i][2], result->boxes[j][2]); + float ymax = std::min(result->boxes[i][3], result->boxes[j][3]); + float overlap_w = std::max(0.0f, xmax - xmin + 1); + float overlap_h = std::max(0.0f, ymax - ymin + 1); + float overlap_area = overlap_w * overlap_h; + float overlap_ratio = + overlap_area / + (area_of_boxes[i] + area_of_boxes[j] - overlap_area + 1e-06); + if (overlap_ratio > iou_threshold) { + suppressed[j] = 1; + } + } + } + DetectionResult backup(*result); + result->Clear(); + result->Reserve(suppressed.size()); + for (size_t i = 0; i < suppressed.size(); ++i) { + if (suppressed[i] == 1) { + continue; + } + result->boxes.emplace_back(backup.boxes[i]); + result->scores.push_back(backup.scores[i]); + result->label_ids.push_back(backup.label_ids[i]); + } +} + +} // namespace utils +} // namespace vision +} // namespace fastdeploy diff --git a/fastdeploy/vision/utils/utils.h b/fastdeploy/vision/utils/utils.h new file mode 100644 index 0000000000..3e333297c7 --- /dev/null +++ b/fastdeploy/vision/utils/utils.h @@ -0,0 +1,58 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "fastdeploy/core/fd_tensor.h" +#include "fastdeploy/utils/utils.h" +#include "fastdeploy/vision/common/result.h" +#include +#include + +namespace fastdeploy { +namespace vision { +namespace utils { +// topk sometimes is a very small value +// so this implementation is simple but I don't think it will +// cost too much time +// Also there may be cause problem since we suppose the minimum value is +// -99999999 +// Do not use this function on array which topk contains value less than +// -99999999 +template +std::vector TopKIndices(const T* array, int array_size, int topk) { + topk = std::min(array_size, topk); + std::vector res(topk); + std::set searched; + for (int32_t i = 0; i < topk; ++i) { + T min = -99999999; + for (int32_t j = 0; j < array_size; ++j) { + if (searched.find(j) != searched.end()) { + continue; + } + if (*(array + j) > min) { + res[i] = j; + min = *(array + j); + } + } + searched.insert(res[i]); + } + return res; +} + +void NMS(DetectionResult* output, float iou_threshold = 0.5); + +} // namespace utils +} // namespace vision +} // namespace fastdeploy diff --git a/fastdeploy/vision/vision_pybind.cc b/fastdeploy/vision/vision_pybind.cc new file mode 100644 index 0000000000..f3c3f0052d --- /dev/null +++ b/fastdeploy/vision/vision_pybind.cc @@ -0,0 +1,45 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/pybind/main.h" + +namespace fastdeploy { + +void BindPpClsModel(pybind11::module& m); +void BindUltralytics(pybind11::module& m); +#ifdef ENABLE_VISION_VISUALIZE +void BindVisualize(pybind11::module& m); +#endif + +void BindVision(pybind11::module& m) { + pybind11::class_(m, "ClassifyResult") + .def(pybind11::init()) + .def_readwrite("label_ids", &vision::ClassifyResult::label_ids) + .def_readwrite("scores", &vision::ClassifyResult::scores) + .def("__repr__", &vision::ClassifyResult::Str) + .def("__str__", &vision::ClassifyResult::Str); + + pybind11::class_(m, "DetectionResult") + .def(pybind11::init()) + .def_readwrite("boxes", &vision::DetectionResult::boxes) + .def_readwrite("scores", &vision::DetectionResult::scores) + .def_readwrite("label_ids", &vision::DetectionResult::label_ids) + .def("__repr__", &vision::DetectionResult::Str) + .def("__str__", &vision::DetectionResult::Str); + + BindPpClsModel(m); + BindUltralytics(m); + BindVisualize(m); +} +} // namespace fastdeploy diff --git a/fastdeploy/vision/visualize/__init__.py b/fastdeploy/vision/visualize/__init__.py new file mode 100644 index 0000000000..6ed99faa63 --- /dev/null +++ b/fastdeploy/vision/visualize/__init__.py @@ -0,0 +1,22 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +import logging +import fastdeploy as fd +from fastdeploy import C + + +def vis_detection(im_data, det_result, line_size=1, font_size=0.5): + C.vision.Visualize.vis_detection(im_data, det_result, line_size, font_size) diff --git a/fastdeploy/vision/visualize/detection.cc b/fastdeploy/vision/visualize/detection.cc new file mode 100644 index 0000000000..d0c4116148 --- /dev/null +++ b/fastdeploy/vision/visualize/detection.cc @@ -0,0 +1,61 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifdef ENABLE_VISION_VISUALIZE + +#include "fastdeploy/vision/visualize/visualize.h" +#include "opencv2/imgproc/imgproc.hpp" + +namespace fastdeploy { +namespace vision { + +// Default only support visualize num_classes <= 1000 +// If need to visualize num_classes > 1000 +// Please call Visualize::GetColorMap(num_classes) first +void Visualize::VisDetection(cv::Mat* im, const DetectionResult& result, + int line_size, float font_size) { + auto color_map = GetColorMap(); + int h = im->rows; + int w = im->cols; + for (size_t i = 0; i < result.boxes.size(); ++i) { + cv::Rect rect(result.boxes[i][0], result.boxes[i][1], + result.boxes[i][2] - result.boxes[i][0], + result.boxes[i][3] - result.boxes[i][1]); + int c0 = color_map[3 * result.label_ids[i] + 0]; + int c1 = color_map[3 * result.label_ids[i] + 1]; + int c2 = color_map[3 * result.label_ids[i] + 2]; + cv::Scalar rect_color = cv::Scalar(c0, c1, c2); + std::string id = std::to_string(result.label_ids[i]); + std::string score = std::to_string(result.scores[i]); + if (score.size() > 4) { + score = score.substr(0, 4); + } + std::string text = id + "," + score; + int font = cv::FONT_HERSHEY_SIMPLEX; + cv::Size text_size = cv::getTextSize(text, font, font_size, 0.5, nullptr); + cv::Point origin; + origin.x = rect.x; + origin.y = rect.y; + cv::Rect text_background = + cv::Rect(result.boxes[i][0], result.boxes[i][1] - text_size.height, + text_size.width, text_size.height); + cv::rectangle(*im, rect, rect_color, line_size); + cv::putText(*im, text, origin, font, font_size, cv::Scalar(255, 255, 255), + 0.5); + } +} + +} // namespace vision +} // namespace fastdeploy +#endif diff --git a/fastdeploy/vision/visualize/visualize.cc b/fastdeploy/vision/visualize/visualize.cc new file mode 100644 index 0000000000..4ad6ba1247 --- /dev/null +++ b/fastdeploy/vision/visualize/visualize.cc @@ -0,0 +1,47 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifdef ENABLE_VISION_VISUALIZE +#include "fastdeploy/vision/visualize/visualize.h" + +namespace fastdeploy { +namespace vision { + +int Visualize::num_classes_ = 0; +std::vector Visualize::color_map_ = std::vector(); + +const std::vector& Visualize::GetColorMap(int num_classes) { + if (num_classes < num_classes_) { + return color_map_; + } + num_classes_ = num_classes; + std::vector().swap(color_map_); + color_map_.resize(3 * num_classes_, 0); + for (int i = 0; i < num_classes_; ++i) { + int j = 0; + int lab = i; + while (lab) { + color_map_[i * 3] |= (((lab >> 0) & 1) << (7 - j)); + color_map_[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j)); + color_map_[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j)); + ++j; + lab >>= 3; + } + } + return color_map_; +} + +} // namespace vision +} // namespace fastdeploy +#endif diff --git a/fastdeploy/vision/visualize/visualize.h b/fastdeploy/vision/visualize/visualize.h new file mode 100644 index 0000000000..6fffa521a6 --- /dev/null +++ b/fastdeploy/vision/visualize/visualize.h @@ -0,0 +1,34 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifdef ENABLE_VISION_VISUALIZE +#pragma once + +#include "fastdeploy/vision/common/result.h" +#include "opencv2/imgproc/imgproc.hpp" +namespace fastdeploy { +namespace vision { + +class FASTDEPLOY_DECL Visualize { + public: + static int num_classes_; + static std::vector color_map_; + static const std::vector& GetColorMap(int num_classes = 1000); + static void VisDetection(cv::Mat* im, const DetectionResult& result, + int line_size = 2, float font_size = 0.5f); +}; + +} // namespace vision +} // namespace fastdeploy +#endif diff --git a/fastdeploy/vision/visualize/visualize_pybind.cc b/fastdeploy/vision/visualize/visualize_pybind.cc new file mode 100644 index 0000000000..66ffc74f9f --- /dev/null +++ b/fastdeploy/vision/visualize/visualize_pybind.cc @@ -0,0 +1,28 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/pybind/main.h" + +namespace fastdeploy { +void BindVisualize(pybind11::module& m) { + pybind11::class_(m, "Visualize") + .def(pybind11::init<>()) + .def_static("vis_detection", [](pybind11::array& im_data, + vision::DetectionResult& result, + int line_size, float font_size) { + auto im = PyArrayToCvMat(im_data); + vision::Visualize::VisDetection(&im, result, line_size, font_size); + }); +} +} // namespace fastdeploy diff --git a/requirements.txt b/requirements.txt index fa4efdf96c..0dd006bbc3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1 @@ -tqdm -six \ No newline at end of file +opencv-python diff --git a/setup.py b/setup.py index 8462b629a5..eb17fbc9c0 100644 --- a/setup.py +++ b/setup.py @@ -1,34 +1,371 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# This file refered to github.com/onnx/onnx.git + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +from distutils.spawn import find_executable +from distutils import sysconfig, log import setuptools -import fastdeploy -import io +import setuptools.command.build_py +import setuptools.command.develop +import setuptools.command.build_ext + +from collections import namedtuple +from contextlib import contextmanager +import glob import os +import shlex +import subprocess +import sys +import platform +from textwrap import dedent +import multiprocessing with open("requirements.txt") as fin: REQUIRED_PACKAGES = fin.read() -def read(*names, **kwargs): - with io.open( - os.path.join(os.path.dirname(__file__), *names), - encoding=kwargs.get("encoding", "utf8")) as fp: - return fp.read() +PACKAGE_NAME = "fastdeploy" +setup_configs = dict() +setup_configs["ENABLE_PADDLE_FRONTEND"] = os.getenv("ENABLE_PADDLE_FRONTEND", + "ON") +setup_configs["ENABLE_ORT_BACKEND"] = os.getenv("ENABLE_ORT_BACKEND", "ON") +setup_configs["BUILD_DEMO"] = os.getenv("BUILD_DEMO", "ON") +setup_configs["ENABLE_VISION"] = os.getenv("ENABLE_VISION", "ON") +setup_configs["ENABLE_TRT_BACKEND"] = os.getenv("ENABLE_TRT_BACKEND", "OFF") +setup_configs["WITH_GPU"] = os.getenv("WITH_GPU", "OFF") +setup_configs["TRT_DIRECTORY"] = os.getenv("TRT_DIRECTORY", "UNDEFINED") +setup_configs["CUDA_DIRECTORY"] = os.getenv("CUDA_DIRECTORY", "UNDEFINED") + +TOP_DIR = os.path.realpath(os.path.dirname(__file__)) +SRC_DIR = os.path.join(TOP_DIR, "fastdeploy") +CMAKE_BUILD_DIR = os.path.join(TOP_DIR, '.setuptools-cmake-build') + +WINDOWS = (os.name == 'nt') + +CMAKE = find_executable('cmake3') or find_executable('cmake') +MAKE = find_executable('make') + +setup_requires = [] +extras_require = {} + +################################################################################ +# Global variables for controlling the build variant +################################################################################ + +# Default value is set to TRUE\1 to keep the settings same as the current ones. +# However going forward the recomemded way to is to set this to False\0 +USE_MSVC_STATIC_RUNTIME = bool( + os.getenv('USE_MSVC_STATIC_RUNTIME', '1') == '1') +ONNX_NAMESPACE = os.getenv('ONNX_NAMESPACE', 'paddle2onnx') +################################################################################ +# Version +################################################################################ + +try: + git_version = subprocess.check_output( + ['git', 'rev-parse', 'HEAD'], cwd=TOP_DIR).decode('ascii').strip() +except (OSError, subprocess.CalledProcessError): + git_version = None + +with open(os.path.join(TOP_DIR, 'VERSION_NUMBER')) as version_file: + VersionInfo = namedtuple('VersionInfo', ['version', 'git_version'])( + version=version_file.read().strip(), git_version=git_version) + +################################################################################ +# Pre Check +################################################################################ + +assert CMAKE, 'Could not find "cmake" executable!' + +################################################################################ +# Utilities +################################################################################ + + +@contextmanager +def cd(path): + if not os.path.isabs(path): + raise RuntimeError('Can only cd to absolute path, got: {}'.format( + path)) + orig_path = os.getcwd() + os.chdir(path) + try: + yield + finally: + os.chdir(orig_path) + + +################################################################################ +# Customized commands +################################################################################ + + +class ONNXCommand(setuptools.Command): + user_options = [] + + def initialize_options(self): + pass + + def finalize_options(self): + pass + + +class create_version(ONNXCommand): + def run(self): + with open(os.path.join(SRC_DIR, 'version.py'), 'w') as f: + f.write( + dedent('''\ + # This file is generated by setup.py. DO NOT EDIT! + from __future__ import absolute_import + from __future__ import division + from __future__ import print_function + from __future__ import unicode_literals + version = '{version}' + git_version = '{git_version}' + '''.format(**dict(VersionInfo._asdict())))) + + +class cmake_build(setuptools.Command): + """ + Compiles everything when `python setupmnm.py build` is run using cmake. + Custom args can be passed to cmake by specifying the `CMAKE_ARGS` + environment variable. + The number of CPUs used by `make` can be specified by passing `-j` + to `setup.py build`. By default all CPUs are used. + """ + user_options = [(str('jobs='), str('j'), + str('Specifies the number of jobs to use with make'))] + + built = False + + def initialize_options(self): + self.jobs = None + + def finalize_options(self): + if sys.version_info[0] >= 3: + self.set_undefined_options('build', ('parallel', 'jobs')) + if self.jobs is None and os.getenv("MAX_JOBS") is not None: + self.jobs = os.getenv("MAX_JOBS") + self.jobs = multiprocessing.cpu_count() if self.jobs is None else int( + self.jobs) + + def run(self): + if cmake_build.built: + return + cmake_build.built = True + if not os.path.exists(CMAKE_BUILD_DIR): + os.makedirs(CMAKE_BUILD_DIR) + + with cd(CMAKE_BUILD_DIR): + build_type = 'Release' + # configure + cmake_args = [ + CMAKE, + '-DPYTHON_INCLUDE_DIR={}'.format(sysconfig.get_python_inc()), + '-DPYTHON_EXECUTABLE={}'.format(sys.executable), + '-DBUILD_FASTDEPLOY_PYTHON=ON', + '-DCMAKE_EXPORT_COMPILE_COMMANDS=ON', + '-DONNX_NAMESPACE={}'.format(ONNX_NAMESPACE), + '-DPY_EXT_SUFFIX={}'.format( + sysconfig.get_config_var('EXT_SUFFIX') or ''), + ] + cmake_args.append('-DCMAKE_BUILD_TYPE=%s' % build_type) + for k, v in setup_configs.items(): + cmake_args.append("-D{}={}".format(k, v)) + if WINDOWS: + cmake_args.extend([ + # we need to link with libpython on windows, so + # passing python version to window in order to + # find python in cmake + '-DPY_VERSION={}'.format('{0}.{1}'.format(* \ + sys.version_info[:2])), + ]) + if platform.architecture()[0] == '64bit': + cmake_args.extend(['-A', 'x64', '-T', 'host=x64']) + else: + cmake_args.extend(['-A', 'Win32', '-T', 'host=x86']) + if 'CMAKE_ARGS' in os.environ: + extra_cmake_args = shlex.split(os.environ['CMAKE_ARGS']) + # prevent crossfire with downstream scripts + del os.environ['CMAKE_ARGS'] + log.info('Extra cmake args: {}'.format(extra_cmake_args)) + cmake_args.extend(extra_cmake_args) + cmake_args.append(TOP_DIR) + subprocess.check_call(cmake_args) + + build_args = [CMAKE, '--build', os.curdir] + if WINDOWS: + build_args.extend(['--config', build_type]) + build_args.extend(['--', '/maxcpucount:{}'.format(self.jobs)]) + else: + build_args.extend(['--', '-j', str(self.jobs)]) + subprocess.check_call(build_args) + + +class build_py(setuptools.command.build_py.build_py): + def run(self): + self.run_command('create_version') + self.run_command('cmake_build') + + generated_python_files = \ + glob.glob(os.path.join(CMAKE_BUILD_DIR, 'fastdeploy', '*.py')) + \ + glob.glob(os.path.join(CMAKE_BUILD_DIR, 'fastdeploy', '*.pyi')) + + for src in generated_python_files: + dst = os.path.join(TOP_DIR, os.path.relpath(src, CMAKE_BUILD_DIR)) + self.copy_file(src, dst) + + return setuptools.command.build_py.build_py.run(self) + + +class develop(setuptools.command.develop.develop): + def run(self): + self.run_command('build_py') + setuptools.command.develop.develop.run(self) + + +class build_ext(setuptools.command.build_ext.build_ext): + def run(self): + self.run_command('cmake_build') + setuptools.command.build_ext.build_ext.run(self) + + def build_extensions(self): + for ext in self.extensions: + fullname = self.get_ext_fullname(ext.name) + filename = os.path.basename(self.get_ext_filename(fullname)) + + lib_path = CMAKE_BUILD_DIR + if os.name == 'nt': + debug_lib_dir = os.path.join(lib_path, "Debug") + release_lib_dir = os.path.join(lib_path, "Release") + if os.path.exists(debug_lib_dir): + lib_path = debug_lib_dir + elif os.path.exists(release_lib_dir): + lib_path = release_lib_dir + src = os.path.join(lib_path, filename) + dst = os.path.join( + os.path.realpath(self.build_lib), "fastdeploy", filename) + self.copy_file(src, dst) + + +class mypy_type_check(ONNXCommand): + description = 'Run MyPy type checker' + + def run(self): + """Run command.""" + onnx_script = os.path.realpath( + os.path.join( + os.path.dirname(os.path.abspath(__file__)), + "tools/mypy-onnx.py")) + returncode = subprocess.call([sys.executable, onnx_script]) + sys.exit(returncode) + + +cmdclass = { + 'create_version': create_version, + 'cmake_build': cmake_build, + 'build_py': build_py, + 'develop': develop, + 'build_ext': build_ext, + 'typecheck': mypy_type_check, +} + +################################################################################ +# Extensions +################################################################################ + +ext_modules = [ + setuptools.Extension( + name=str(PACKAGE_NAME + '.fastdeploy_main'), sources=[]), +] + +################################################################################ +# Packages +################################################################################ + +# no need to do fancy stuff so far +packages = setuptools.find_packages() + +################################################################################ +# Test +################################################################################ + +if sys.version_info[0] == 3: + # Mypy doesn't work with Python 2 + extras_require['mypy'] = ['mypy==0.600'] + +################################################################################ +# Final +################################################################################ + +package_data = {PACKAGE_NAME: ["LICENSE", "ThirdPartyNotices.txt"]} + +if sys.argv[1] == "install" or sys.argv[1] == "bdist_wheel": + if not os.path.exists(".setuptools-cmake-build"): + print("Please execute `python setup.py build` first.") + sys.exit(0) + import shutil + + shutil.copy("ThirdPartyNotices.txt", "fastdeploy") + shutil.copy("LICENSE", "fastdeploy") + depend_libs = list() + for f in os.listdir(".setuptools-cmake-build"): + if not os.path.isfile(os.path.join(".setuptools-cmake-build", f)): + continue + if f.count("libfastdeploy") > 0: + depend_libs.append(os.path.join(".setuptools-cmake-build", f)) + for dirname in os.listdir(".setuptools-cmake-build/third_libs/install"): + for lib in os.listdir( + os.path.join(".setuptools-cmake-build/third_libs/install", + dirname, "lib")): + if lib.count(".so") == 0 and lib.count( + ".dylib") == 0 and lib.count(".a") == 0: + continue + if not os.path.isfile( + os.path.join(".setuptools-cmake-build/third_libs/install", + dirname, "lib", lib)): + continue + shutil.copy( + os.path.join(".setuptools-cmake-build/third_libs/install", + dirname, "lib", lib), "fastdeploy/libs") + + all_libs = os.listdir("fastdeploy/libs") + for lib in all_libs: + package_data[PACKAGE_NAME].append(os.path.join("libs", lib)) setuptools.setup( - name="fastdeploy-python", - version=fastdeploy.__version__, - author="FastDeploy", - author_email="fastdeploy@baidu.com", - description="FastDeploy is a toolkit to deploy deeplearning models.", - long_description=read("README.md"), - long_description_content_type="text/markdown", - url="https://github.com/PaddlePaddle/FastDeploy", - packages=setuptools.find_packages(), + name=PACKAGE_NAME, + version=VersionInfo.version, + description="Deploy Kit Tool For Deeplearning models.", + ext_modules=ext_modules, + cmdclass=cmdclass, + packages=packages, + package_data=package_data, + include_package_data=True, + setup_requires=setup_requires, + extras_require=extras_require, + author='paddle-infer', + author_email='paddle-infer@baidu.com', + url='https://github.com/PaddlePaddle/Paddle2ONNX.git', install_requires=REQUIRED_PACKAGES, classifiers=[ "Programming Language :: Python :: 3", "License :: OSI Approved :: Apache Software License", "Operating System :: OS Independent", ], - license='Apache 2.0', - entry_points={ - 'console_scripts': ['fastdeploy=fastdeploy.__init__:main', ] - }) + license='Apache 2.0') diff --git a/third_party/pybind11/.appveyor.yml b/third_party/pybind11/.appveyor.yml new file mode 100644 index 0000000000..360760ac8d --- /dev/null +++ b/third_party/pybind11/.appveyor.yml @@ -0,0 +1,35 @@ +version: 1.0.{build} +image: +- Visual Studio 2017 +test: off +skip_branch_with_pr: true +build: + parallel: true +platform: +- x86 +environment: + matrix: + - PYTHON: 36 + CONFIG: Debug +install: +- ps: | + $env:CMAKE_GENERATOR = "Visual Studio 15 2017" + if ($env:PLATFORM -eq "x64") { $env:PYTHON = "$env:PYTHON-x64" } + $env:PATH = "C:\Python$env:PYTHON\;C:\Python$env:PYTHON\Scripts\;$env:PATH" + python -W ignore -m pip install --upgrade pip wheel + python -W ignore -m pip install pytest numpy --no-warn-script-location pytest-timeout +- ps: | + Start-FileDownload 'https://gitlab.com/libeigen/eigen/-/archive/3.3.7/eigen-3.3.7.zip' + 7z x eigen-3.3.7.zip -y > $null + $env:CMAKE_INCLUDE_PATH = "eigen-3.3.7;$env:CMAKE_INCLUDE_PATH" +build_script: +- cmake -G "%CMAKE_GENERATOR%" -A "%CMAKE_ARCH%" + -DCMAKE_CXX_STANDARD=14 + -DPYBIND11_WERROR=ON + -DDOWNLOAD_CATCH=ON + -DCMAKE_SUPPRESS_REGENERATION=1 + . +- set MSBuildLogger="C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" +- cmake --build . --config %CONFIG% --target pytest -- /m /v:m /logger:%MSBuildLogger% +- cmake --build . --config %CONFIG% --target cpptest -- /m /v:m /logger:%MSBuildLogger% +on_failure: if exist "tests\test_cmake_build" type tests\test_cmake_build\*.log* diff --git a/third_party/pybind11/.clang-format b/third_party/pybind11/.clang-format new file mode 100644 index 0000000000..b477a16037 --- /dev/null +++ b/third_party/pybind11/.clang-format @@ -0,0 +1,38 @@ +--- +# See all possible options and defaults with: +# clang-format --style=llvm --dump-config +BasedOnStyle: LLVM +AccessModifierOffset: -4 +AllowShortLambdasOnASingleLine: true +AlwaysBreakTemplateDeclarations: Yes +BinPackArguments: false +BinPackParameters: false +BreakBeforeBinaryOperators: All +BreakConstructorInitializers: BeforeColon +ColumnLimit: 99 +CommentPragmas: 'NOLINT:.*|^ IWYU pragma:' +IncludeBlocks: Regroup +IndentCaseLabels: true +IndentPPDirectives: AfterHash +IndentWidth: 4 +Language: Cpp +SpaceAfterCStyleCast: true +Standard: Cpp11 +StatementMacros: ['PyObject_HEAD'] +TabWidth: 4 +IncludeCategories: + - Regex: '' + Priority: 4 + - Regex: '.*' + Priority: 5 +... diff --git a/third_party/pybind11/.clang-tidy b/third_party/pybind11/.clang-tidy new file mode 100644 index 0000000000..e82443c4ca --- /dev/null +++ b/third_party/pybind11/.clang-tidy @@ -0,0 +1,73 @@ +FormatStyle: file + +Checks: ' +*bugprone*, +clang-analyzer-optin.performance.Padding, +clang-analyzer-optin.cplusplus.VirtualCall, +cppcoreguidelines-init-variables, +cppcoreguidelines-prefer-member-initializer, +cppcoreguidelines-pro-type-static-cast-downcast, +cppcoreguidelines-slicing, +google-explicit-constructor, +llvm-namespace-comment, +misc-misplaced-const, +misc-non-copyable-objects, +misc-static-assert, +misc-throw-by-value-catch-by-reference, +misc-uniqueptr-reset-release, +misc-unused-parameters, +modernize-avoid-bind, +modernize-make-shared, +modernize-redundant-void-arg, +modernize-replace-auto-ptr, +modernize-replace-disallow-copy-and-assign-macro, +modernize-replace-random-shuffle, +modernize-shrink-to-fit, +modernize-use-auto, +modernize-use-bool-literals, +modernize-use-default-member-init, +modernize-use-equals-default, +modernize-use-equals-delete, +modernize-use-emplace, +modernize-use-noexcept, +modernize-use-nullptr, +modernize-use-override, +modernize-use-using, +*performance*, +readability-avoid-const-params-in-decls, +readability-braces-around-statements, +readability-const-return-type, +readability-container-size-empty, +readability-delete-null-pointer, +readability-else-after-return, +readability-implicit-bool-conversion, +readability-inconsistent-declaration-parameter-name, +readability-make-member-function-const, +readability-misplaced-array-index, +readability-non-const-parameter, +readability-qualified-auto, +readability-redundant-function-ptr-dereference, +readability-redundant-smartptr-get, +readability-redundant-string-cstr, +readability-simplify-subscript-expr, +readability-static-accessed-through-instance, +readability-static-definition-in-anonymous-namespace, +readability-string-compare, +readability-suspicious-call-argument, +readability-uniqueptr-delete-release, +-bugprone-exception-escape, +-bugprone-reserved-identifier, +-bugprone-unused-raii, +' + +CheckOptions: +- key: performance-for-range-copy.WarnOnAllAutoCopies + value: true +- key: performance-unnecessary-value-param.AllowedTypes + value: 'exception_ptr$;' +- key: readability-implicit-bool-conversion.AllowPointerConditions + value: true + +HeaderFilterRegex: 'pybind11/.*h' + +WarningsAsErrors: '*' diff --git a/third_party/pybind11/.cmake-format.yaml b/third_party/pybind11/.cmake-format.yaml new file mode 100644 index 0000000000..a2a69f3f89 --- /dev/null +++ b/third_party/pybind11/.cmake-format.yaml @@ -0,0 +1,73 @@ +parse: + additional_commands: + pybind11_add_module: + flags: + - THIN_LTO + - MODULE + - SHARED + - NO_EXTRAS + - EXCLUDE_FROM_ALL + - SYSTEM + +format: + line_width: 99 + tab_size: 2 + + # If an argument group contains more than this many sub-groups + # (parg or kwarg groups) then force it to a vertical layout. + max_subgroups_hwrap: 2 + + # If a positional argument group contains more than this many + # arguments, then force it to a vertical layout. + max_pargs_hwrap: 6 + + # If a cmdline positional group consumes more than this many + # lines without nesting, then invalidate the layout (and nest) + max_rows_cmdline: 2 + separate_ctrl_name_with_space: false + separate_fn_name_with_space: false + dangle_parens: false + + # If the trailing parenthesis must be 'dangled' on its on + # 'line, then align it to this reference: `prefix`: the start' + # 'of the statement, `prefix-indent`: the start of the' + # 'statement, plus one indentation level, `child`: align to' + # the column of the arguments + dangle_align: prefix + # If the statement spelling length (including space and + # parenthesis) is smaller than this amount, then force reject + # nested layouts. + min_prefix_chars: 4 + + # If the statement spelling length (including space and + # parenthesis) is larger than the tab width by more than this + # amount, then force reject un-nested layouts. + max_prefix_chars: 10 + + # If a candidate layout is wrapped horizontally but it exceeds + # this many lines, then reject the layout. + max_lines_hwrap: 2 + + line_ending: unix + + # Format command names consistently as 'lower' or 'upper' case + command_case: canonical + + # Format keywords consistently as 'lower' or 'upper' case + # unchanged is valid too + keyword_case: 'upper' + + # A list of command names which should always be wrapped + always_wrap: [] + + # If true, the argument lists which are known to be sortable + # will be sorted lexicographically + enable_sort: true + + # If true, the parsers may infer whether or not an argument + # list is sortable (without annotation). + autosort: false + +# Causes a few issues - can be solved later, possibly. +markup: + enable_markup: false diff --git a/third_party/pybind11/.gitattributes b/third_party/pybind11/.gitattributes new file mode 100644 index 0000000000..d611e1496d --- /dev/null +++ b/third_party/pybind11/.gitattributes @@ -0,0 +1 @@ +docs/*.svg binary diff --git a/third_party/pybind11/.github/CODEOWNERS b/third_party/pybind11/.github/CODEOWNERS new file mode 100644 index 0000000000..4e2c66902e --- /dev/null +++ b/third_party/pybind11/.github/CODEOWNERS @@ -0,0 +1,9 @@ +*.cmake @henryiii +CMakeLists.txt @henryiii +*.yml @henryiii +*.yaml @henryiii +/tools/ @henryiii +/pybind11/ @henryiii +noxfile.py @henryiii +.clang-format @henryiii +.clang-tidy @henryiii diff --git a/third_party/pybind11/.github/CONTRIBUTING.md b/third_party/pybind11/.github/CONTRIBUTING.md new file mode 100644 index 0000000000..c8ec91ff7a --- /dev/null +++ b/third_party/pybind11/.github/CONTRIBUTING.md @@ -0,0 +1,386 @@ +Thank you for your interest in this project! Please refer to the following +sections on how to contribute code and bug reports. + +### Reporting bugs + +Before submitting a question or bug report, please take a moment of your time +and ensure that your issue isn't already discussed in the project documentation +provided at [pybind11.readthedocs.org][] or in the [issue tracker][]. You can +also check [gitter][] to see if it came up before. + +Assuming that you have identified a previously unknown problem or an important +question, it's essential that you submit a self-contained and minimal piece of +code that reproduces the problem. In other words: no external dependencies, +isolate the function(s) that cause breakage, submit matched and complete C++ +and Python snippets that can be easily compiled and run in isolation; or +ideally make a small PR with a failing test case that can be used as a starting +point. + +## Pull requests + +Contributions are submitted, reviewed, and accepted using GitHub pull requests. +Please refer to [this article][using pull requests] for details and adhere to +the following rules to make the process as smooth as possible: + +* Make a new branch for every feature you're working on. +* Make small and clean pull requests that are easy to review but make sure they + do add value by themselves. +* Add tests for any new functionality and run the test suite (`cmake --build + build --target pytest`) to ensure that no existing features break. +* Please run [`pre-commit`][pre-commit] to check your code matches the + project style. (Note that `gawk` is required.) Use `pre-commit run + --all-files` before committing (or use installed-mode, check pre-commit docs) + to verify your code passes before pushing to save time. +* This project has a strong focus on providing general solutions using a + minimal amount of code, thus small pull requests are greatly preferred. + +### Licensing of contributions + +pybind11 is provided under a BSD-style license that can be found in the +``LICENSE`` file. By using, distributing, or contributing to this project, you +agree to the terms and conditions of this license. + +You are under no obligation whatsoever to provide any bug fixes, patches, or +upgrades to the features, functionality or performance of the source code +("Enhancements") to anyone; however, if you choose to make your Enhancements +available either publicly, or directly to the author of this software, without +imposing a separate written license agreement for such Enhancements, then you +hereby grant the following license: a non-exclusive, royalty-free perpetual +license to install, use, modify, prepare derivative works, incorporate into +other computer software, distribute, and sublicense such enhancements or +derivative works thereof, in binary and source code form. + + +## Development of pybind11 + +### Quick setup + +To setup a quick development environment, use [`nox`](https://nox.thea.codes). +This will allow you to do some common tasks with minimal setup effort, but will +take more time to run and be less flexible than a full development environment. +If you use [`pipx run nox`](https://pipx.pypa.io), you don't even need to +install `nox`. Examples: + +```bash +# List all available sessions +nox -l + +# Run linters +nox -s lint + +# Run tests on Python 3.9 +nox -s tests-3.9 + +# Build and preview docs +nox -s docs -- serve + +# Build SDists and wheels +nox -s build +``` + +### Full setup + +To setup an ideal development environment, run the following commands on a +system with CMake 3.14+: + +```bash +python3 -m venv venv +source venv/bin/activate +pip install -r tests/requirements.txt +cmake -S . -B build -DDOWNLOAD_CATCH=ON -DDOWNLOAD_EIGEN=ON +cmake --build build -j4 +``` + +Tips: + +* You can use `virtualenv` (faster, from PyPI) instead of `venv`. +* You can select any name for your environment folder; if it contains "env" it + will be ignored by git. +* If you don't have CMake 3.14+, just add "cmake" to the pip install command. +* You can use `-DPYBIND11_FINDPYTHON=ON` to use FindPython on CMake 3.12+ +* In classic mode, you may need to set `-DPYTHON_EXECUTABLE=/path/to/python`. + FindPython uses `-DPython_ROOT_DIR=/path/to` or + `-DPython_EXECUTABLE=/path/to/python`. + +### Configuration options + +In CMake, configuration options are given with "-D". Options are stored in the +build directory, in the `CMakeCache.txt` file, so they are remembered for each +build directory. Two selections are special - the generator, given with `-G`, +and the compiler, which is selected based on environment variables `CXX` and +similar, or `-DCMAKE_CXX_COMPILER=`. Unlike the others, these cannot be changed +after the initial run. + +The valid options are: + +* `-DCMAKE_BUILD_TYPE`: Release, Debug, MinSizeRel, RelWithDebInfo +* `-DPYBIND11_FINDPYTHON=ON`: Use CMake 3.12+'s FindPython instead of the + classic, deprecated, custom FindPythonLibs +* `-DPYBIND11_NOPYTHON=ON`: Disable all Python searching (disables tests) +* `-DBUILD_TESTING=ON`: Enable the tests +* `-DDOWNLOAD_CATCH=ON`: Download catch to build the C++ tests +* `-DDOWNLOAD_EIGEN=ON`: Download Eigen for the NumPy tests +* `-DPYBIND11_INSTALL=ON/OFF`: Enable the install target (on by default for the + master project) +* `-DUSE_PYTHON_INSTALL_DIR=ON`: Try to install into the python dir + + +
A few standard CMake tricks: (click to expand)

+ +* Use `cmake --build build -v` to see the commands used to build the files. +* Use `cmake build -LH` to list the CMake options with help. +* Use `ccmake` if available to see a curses (terminal) gui, or `cmake-gui` for + a completely graphical interface (not present in the PyPI package). +* Use `cmake --build build -j12` to build with 12 cores (for example). +* Use `-G` and the name of a generator to use something different. `cmake + --help` lists the generators available. + - On Unix, setting `CMAKE_GENERATER=Ninja` in your environment will give + you automatic mulithreading on all your CMake projects! +* Open the `CMakeLists.txt` with QtCreator to generate for that IDE. +* You can use `-DCMAKE_EXPORT_COMPILE_COMMANDS=ON` to generate the `.json` file + that some tools expect. + +

+ + +To run the tests, you can "build" the check target: + +```bash +cmake --build build --target check +``` + +`--target` can be spelled `-t` in CMake 3.15+. You can also run individual +tests with these targets: + +* `pytest`: Python tests only, using the +[pytest](https://docs.pytest.org/en/stable/) framework +* `cpptest`: C++ tests only +* `test_cmake_build`: Install / subdirectory tests + +If you want to build just a subset of tests, use +`-DPYBIND11_TEST_OVERRIDE="test_callbacks;test_pickling"`. If this is +empty, all tests will be built. Tests are specified without an extension if they need both a .py and +.cpp file. + +You may also pass flags to the `pytest` target by editing `tests/pytest.ini` or +by using the `PYTEST_ADDOPTS` environment variable +(see [`pytest` docs](https://docs.pytest.org/en/2.7.3/customize.html#adding-default-options)). As an example: + +```bash +env PYTEST_ADDOPTS="--capture=no --exitfirst" \ + cmake --build build --target pytest +# Or using abbreviated flags +env PYTEST_ADDOPTS="-s -x" cmake --build build --target pytest +``` + +### Formatting + +All formatting is handled by pre-commit. + +Install with brew (macOS) or pip (any OS): + +```bash +# Any OS +python3 -m pip install pre-commit + +# OR macOS with homebrew: +brew install pre-commit +``` + +Then, you can run it on the items you've added to your staging area, or all +files: + +```bash +pre-commit run +# OR +pre-commit run --all-files +``` + +And, if you want to always use it, you can install it as a git hook (hence the +name, pre-commit): + +```bash +pre-commit install +``` + +### Clang-Format + +As of v2.6.2, pybind11 ships with a [`clang-format`][clang-format] +configuration file at the top level of the repo (the filename is +`.clang-format`). Currently, formatting is NOT applied automatically, but +manually using `clang-format` for newly developed files is highly encouraged. +To check if a file needs formatting: + +```bash +clang-format -style=file --dry-run some.cpp +``` + +The output will show things to be fixed, if any. To actually format the file: + +```bash +clang-format -style=file -i some.cpp +``` + +Note that the `-style-file` option searches the parent directories for the +`.clang-format` file, i.e. the commands above can be run in any subdirectory +of the pybind11 repo. + +### Clang-Tidy + +[`clang-tidy`][clang-tidy] performs deeper static code analyses and is +more complex to run, compared to `clang-format`, but support for `clang-tidy` +is built into the pybind11 CMake configuration. To run `clang-tidy`, the +following recipe should work. Run the `docker` command from the top-level +directory inside your pybind11 git clone. Files will be modified in place, +so you can use git to monitor the changes. + +```bash +docker run --rm -v $PWD:/mounted_pybind11 -it silkeh/clang:12 +apt-get update && apt-get install -y python3-dev python3-pytest +cmake -S /mounted_pybind11/ -B build -DCMAKE_CXX_CLANG_TIDY="$(which clang-tidy);-fix" -DDOWNLOAD_EIGEN=ON -DDOWNLOAD_CATCH=ON -DCMAKE_CXX_STANDARD=17 +cmake --build build -j 2 -- --keep-going +``` + +### Include what you use + +To run include what you use, install (`brew install include-what-you-use` on +macOS), then run: + +```bash +cmake -S . -B build-iwyu -DCMAKE_CXX_INCLUDE_WHAT_YOU_USE=$(which include-what-you-use) +cmake --build build +``` + +The report is sent to stderr; you can pipe it into a file if you wish. + +### Build recipes + +This builds with the Intel compiler (assuming it is in your path, along with a +recent CMake and Python): + +```bash +python3 -m venv venv +. venv/bin/activate +pip install pytest +cmake -S . -B build-intel -DCMAKE_CXX_COMPILER=$(which icpc) -DDOWNLOAD_CATCH=ON -DDOWNLOAD_EIGEN=ON -DPYBIND11_WERROR=ON +``` + +This will test the PGI compilers: + +```bash +docker run --rm -it -v $PWD:/pybind11 nvcr.io/hpc/pgi-compilers:ce +apt-get update && apt-get install -y python3-dev python3-pip python3-pytest +wget -qO- "https://cmake.org/files/v3.18/cmake-3.18.2-Linux-x86_64.tar.gz" | tar --strip-components=1 -xz -C /usr/local +cmake -S pybind11/ -B build +cmake --build build +``` + +### Explanation of the SDist/wheel building design + +> These details below are _only_ for packaging the Python sources from git. The +> SDists and wheels created do not have any extra requirements at all and are +> completely normal. + +The main objective of the packaging system is to create SDists (Python's source +distribution packages) and wheels (Python's binary distribution packages) that +include everything that is needed to work with pybind11, and which can be +installed without any additional dependencies. This is more complex than it +appears: in order to support CMake as a first class language even when using +the PyPI package, they must include the _generated_ CMake files (so as not to +require CMake when installing the `pybind11` package itself). They should also +provide the option to install to the "standard" location +(`/include/pybind11` and `/share/cmake/pybind11`) so they are +easy to find with CMake, but this can cause problems if you are not an +environment or using ``pyproject.toml`` requirements. This was solved by having +two packages; the "nice" pybind11 package that stores the includes and CMake +files inside the package, that you get access to via functions in the package, +and a `pybind11-global` package that can be included via `pybind11[global]` if +you want the more invasive but discoverable file locations. + +If you want to install or package the GitHub source, it is best to have Pip 10 +or newer on Windows, macOS, or Linux (manylinux1 compatible, includes most +distributions). You can then build the SDists, or run any procedure that makes +SDists internally, like making wheels or installing. + + +```bash +# Editable development install example +python3 -m pip install -e . +``` + +Since Pip itself does not have an `sdist` command (it does have `wheel` and +`install`), you may want to use the upcoming `build` package: + +```bash +python3 -m pip install build + +# Normal package +python3 -m build -s . + +# Global extra +PYBIND11_GLOBAL_SDIST=1 python3 -m build -s . +``` + +If you want to use the classic "direct" usage of `python setup.py`, you will +need CMake 3.15+ and either `make` or `ninja` preinstalled (possibly via `pip +install cmake ninja`), since directly running Python on `setup.py` cannot pick +up and install `pyproject.toml` requirements. As long as you have those two +things, though, everything works the way you would expect: + +```bash +# Normal package +python3 setup.py sdist + +# Global extra +PYBIND11_GLOBAL_SDIST=1 python3 setup.py sdist +``` + +A detailed explanation of the build procedure design for developers wanting to +work on or maintain the packaging system is as follows: + +#### 1. Building from the source directory + +When you invoke any `setup.py` command from the source directory, including +`pip wheel .` and `pip install .`, you will activate a full source build. This +is made of the following steps: + +1. If the tool is PEP 518 compliant, like Pip 10+, it will create a temporary + virtual environment and install the build requirements (mostly CMake) into + it. (if you are not on Windows, macOS, or a manylinux compliant system, you + can disable this with `--no-build-isolation` as long as you have CMake 3.15+ + installed) +2. The environment variable `PYBIND11_GLOBAL_SDIST` is checked - if it is set + and truthy, this will be make the accessory `pybind11-global` package, + instead of the normal `pybind11` package. This package is used for + installing the files directly to your environment root directory, using + `pybind11[global]`. +2. `setup.py` reads the version from `pybind11/_version.py` and verifies it + matches `includes/pybind11/detail/common.h`. +3. CMake is run with `-DCMAKE_INSTALL_PREIFX=pybind11`. Since the CMake install + procedure uses only relative paths and is identical on all platforms, these + files are valid as long as they stay in the correct relative position to the + includes. `pybind11/share/cmake/pybind11` has the CMake files, and + `pybind11/include` has the includes. The build directory is discarded. +4. Simpler files are placed in the SDist: `tools/setup_*.py.in`, + `tools/pyproject.toml` (`main` or `global`) +5. The package is created by running the setup function in the + `tools/setup_*.py`. `setup_main.py` fills in Python packages, and + `setup_global.py` fills in only the data/header slots. +6. A context manager cleans up the temporary CMake install directory (even if + an error is thrown). + +### 2. Building from SDist + +Since the SDist has the rendered template files in `tools` along with the +includes and CMake files in the correct locations, the builds are completely +trivial and simple. No extra requirements are required. You can even use Pip 9 +if you really want to. + + +[pre-commit]: https://pre-commit.com +[clang-format]: https://clang.llvm.org/docs/ClangFormat.html +[clang-tidy]: https://clang.llvm.org/extra/clang-tidy/ +[pybind11.readthedocs.org]: http://pybind11.readthedocs.org/en/latest +[issue tracker]: https://github.com/pybind/pybind11/issues +[gitter]: https://gitter.im/pybind/Lobby +[using pull requests]: https://help.github.com/articles/using-pull-requests diff --git a/third_party/pybind11/.github/ISSUE_TEMPLATE/bug-report.yml b/third_party/pybind11/.github/ISSUE_TEMPLATE/bug-report.yml new file mode 100644 index 0000000000..bd6a9a8e22 --- /dev/null +++ b/third_party/pybind11/.github/ISSUE_TEMPLATE/bug-report.yml @@ -0,0 +1,45 @@ +name: Bug Report +description: File an issue about a bug +title: "[BUG]: " +labels: [triage] +body: + - type: markdown + attributes: + value: | + Maintainers will only make a best effort to triage PRs. Please do your best to make the issue as easy to act on as possible, and only open if clearly a problem with pybind11 (ask first if unsure). + - type: checkboxes + id: steps + attributes: + label: Required prerequisites + description: Make sure you've completed the following steps before submitting your issue -- thank you! + options: + - label: Make sure you've read the [documentation](https://pybind11.readthedocs.io). Your issue may be addressed there. + required: true + - label: Search the [issue tracker](https://github.com/pybind/pybind11/issues) and [Discussions](https:/pybind/pybind11/discussions) to verify that this hasn't already been reported. +1 or comment there if it has. + required: true + - label: Consider asking first in the [Gitter chat room](https://gitter.im/pybind/Lobby) or in a [Discussion](https:/pybind/pybind11/discussions/new). + required: false + + - type: textarea + id: description + attributes: + label: Problem description + placeholder: >- + Provide a short description, state the expected behavior and what + actually happens. Include relevant information like what version of + pybind11 you are using, what system you are on, and any useful commands + / output. + validations: + required: true + + - type: textarea + id: code + attributes: + label: Reproducible example code + placeholder: >- + The code should be minimal, have no external dependencies, isolate the + function(s) that cause breakage. Submit matched and complete C++ and + Python snippets that can be easily compiled and run to diagnose the + issue. If possible, make a PR with a new, failing test to give us a + starting point to work on! + render: text diff --git a/third_party/pybind11/.github/ISSUE_TEMPLATE/config.yml b/third_party/pybind11/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 0000000000..27f9a80441 --- /dev/null +++ b/third_party/pybind11/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,8 @@ +blank_issues_enabled: false +contact_links: + - name: Ask a question + url: https://github.com/pybind/pybind11/discussions/new + about: Please ask and answer questions here, or propose new ideas. + - name: Gitter room + url: https://gitter.im/pybind/Lobby + about: A room for discussing pybind11 with an active community diff --git a/third_party/pybind11/.github/dependabot.yml b/third_party/pybind11/.github/dependabot.yml new file mode 100644 index 0000000000..d8d1eed8c3 --- /dev/null +++ b/third_party/pybind11/.github/dependabot.yml @@ -0,0 +1,11 @@ +version: 2 +updates: + # Maintain dependencies for GitHub Actions + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "daily" + ignore: + # Official actions have moving tags like v1 + - dependency-name: "actions/*" + update-types: ["version-update:semver-minor", "version-update:semver-patch"] diff --git a/third_party/pybind11/.github/labeler.yml b/third_party/pybind11/.github/labeler.yml new file mode 100644 index 0000000000..abb0d05aaa --- /dev/null +++ b/third_party/pybind11/.github/labeler.yml @@ -0,0 +1,8 @@ +docs: +- any: + - 'docs/**/*.rst' + - '!docs/changelog.rst' + - '!docs/upgrade.rst' + +ci: +- '.github/workflows/*.yml' diff --git a/third_party/pybind11/.github/labeler_merged.yml b/third_party/pybind11/.github/labeler_merged.yml new file mode 100644 index 0000000000..2374ad42e4 --- /dev/null +++ b/third_party/pybind11/.github/labeler_merged.yml @@ -0,0 +1,3 @@ +needs changelog: +- all: + - '!docs/changelog.rst' diff --git a/third_party/pybind11/.github/matchers/pylint.json b/third_party/pybind11/.github/matchers/pylint.json new file mode 100644 index 0000000000..e3a6bd16b0 --- /dev/null +++ b/third_party/pybind11/.github/matchers/pylint.json @@ -0,0 +1,32 @@ +{ + "problemMatcher": [ + { + "severity": "warning", + "pattern": [ + { + "regexp": "^([^:]+):(\\d+):(\\d+): ([A-DF-Z]\\d+): \\033\\[[\\d;]+m([^\\033]+).*$", + "file": 1, + "line": 2, + "column": 3, + "code": 4, + "message": 5 + } + ], + "owner": "pylint-warning" + }, + { + "severity": "error", + "pattern": [ + { + "regexp": "^([^:]+):(\\d+):(\\d+): (E\\d+): \\033\\[[\\d;]+m([^\\033]+).*$", + "file": 1, + "line": 2, + "column": 3, + "code": 4, + "message": 5 + } + ], + "owner": "pylint-error" + } + ] +} diff --git a/third_party/pybind11/.github/pull_request_template.md b/third_party/pybind11/.github/pull_request_template.md new file mode 100644 index 0000000000..54b7f5100d --- /dev/null +++ b/third_party/pybind11/.github/pull_request_template.md @@ -0,0 +1,19 @@ + +## Description + + + + +## Suggested changelog entry: + + + +```rst + +``` + + diff --git a/third_party/pybind11/.github/workflows/ci.yml b/third_party/pybind11/.github/workflows/ci.yml new file mode 100644 index 0000000000..8b537e6e1e --- /dev/null +++ b/third_party/pybind11/.github/workflows/ci.yml @@ -0,0 +1,945 @@ +name: CI + +on: + workflow_dispatch: + pull_request: + push: + branches: + - master + - stable + - v* + +concurrency: + group: test-${{ github.ref }} + cancel-in-progress: true + +env: + PIP_ONLY_BINARY: numpy + FORCE_COLOR: 3 + PYTEST_TIMEOUT: 300 + +jobs: + # This is the "main" test suite, which tests a large number of different + # versions of default compilers and Python versions in GitHub Actions. + standard: + strategy: + fail-fast: false + matrix: + runs-on: [ubuntu-latest, windows-2022, macos-latest] + python: + - '3.6' + - '3.9' + - '3.10' + - 'pypy-3.7' + - 'pypy-3.8' + - 'pypy-3.9' + + # Items in here will either be added to the build matrix (if not + # present), or add new keys to an existing matrix element if all the + # existing keys match. + # + # We support an optional key: args, for cmake args + include: + # Just add a key + - runs-on: ubuntu-latest + python: '3.6' + args: > + -DPYBIND11_FINDPYTHON=ON + -DCMAKE_CXX_FLAGS="-D_=1" + - runs-on: ubuntu-latest + python: 'pypy-3.8' + args: > + -DPYBIND11_FINDPYTHON=ON + - runs-on: windows-2019 + python: '3.6' + args: > + -DPYBIND11_FINDPYTHON=ON + # Inject a couple Windows 2019 runs + - runs-on: windows-2019 + python: '3.9' + + name: "🐍 ${{ matrix.python }} • ${{ matrix.runs-on }} • x64 ${{ matrix.args }}" + runs-on: ${{ matrix.runs-on }} + + steps: + - uses: actions/checkout@v3 + + - name: Setup Python ${{ matrix.python }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python }} + + - name: Setup Boost (Linux) + # Can't use boost + define _ + if: runner.os == 'Linux' && matrix.python != '3.6' + run: sudo apt-get install libboost-dev + + - name: Setup Boost (macOS) + if: runner.os == 'macOS' + run: brew install boost + + - name: Update CMake + uses: jwlawson/actions-setup-cmake@v1.12 + + - name: Cache wheels + if: runner.os == 'macOS' + uses: actions/cache@v3 + with: + # This path is specific to macOS - we really only need it for PyPy NumPy wheels + # See https://github.com/actions/cache/blob/master/examples.md#python---pip + # for ways to do this more generally + path: ~/Library/Caches/pip + # Look to see if there is a cache hit for the corresponding requirements file + key: ${{ runner.os }}-pip-${{ matrix.python }}-x64-${{ hashFiles('tests/requirements.txt') }} + + - name: Prepare env + run: | + python -m pip install -r tests/requirements.txt + + - name: Setup annotations on Linux + if: runner.os == 'Linux' + run: python -m pip install pytest-github-actions-annotate-failures + + # First build - C++11 mode and inplace + - name: Configure C++11 ${{ matrix.args }} + run: > + cmake -S . -B . + -DPYBIND11_WERROR=ON + -DDOWNLOAD_CATCH=ON + -DDOWNLOAD_EIGEN=ON + -DCMAKE_CXX_STANDARD=11 + ${{ matrix.args }} + + - name: Build C++11 + run: cmake --build . -j 2 + + - name: Python tests C++11 + run: cmake --build . --target pytest -j 2 + + - name: C++11 tests + # TODO: Figure out how to load the DLL on Python 3.8+ + if: "!(runner.os == 'Windows' && (matrix.python == 3.8 || matrix.python == 3.9 || matrix.python == '3.10' || matrix.python == '3.11-dev' || matrix.python == 'pypy-3.8'))" + run: cmake --build . --target cpptest -j 2 + + - name: Interface test C++11 + run: cmake --build . --target test_cmake_build + + - name: Clean directory + run: git clean -fdx + + # Second build - C++17 mode and in a build directory + - name: Configure C++17 + run: > + cmake -S . -B build2 + -DPYBIND11_WERROR=ON + -DDOWNLOAD_CATCH=ON + -DDOWNLOAD_EIGEN=ON + -DCMAKE_CXX_STANDARD=17 + ${{ matrix.args }} + + - name: Build + run: cmake --build build2 -j 2 + + - name: Python tests + run: cmake --build build2 --target pytest + + - name: C++ tests + # TODO: Figure out how to load the DLL on Python 3.8+ + if: "!(runner.os == 'Windows' && (matrix.python == 3.8 || matrix.python == 3.9 || matrix.python == '3.10' || matrix.python == '3.11-dev' || matrix.python == 'pypy-3.8'))" + run: cmake --build build2 --target cpptest + + # Third build - C++17 mode with unstable ABI + - name: Configure (unstable ABI) + run: > + cmake -S . -B build3 + -DPYBIND11_WERROR=ON + -DDOWNLOAD_CATCH=ON + -DDOWNLOAD_EIGEN=ON + -DCMAKE_CXX_STANDARD=17 + -DPYBIND11_INTERNALS_VERSION=10000000 + "-DPYBIND11_TEST_OVERRIDE=test_call_policies.cpp;test_gil_scoped.cpp;test_thread.cpp" + ${{ matrix.args }} + + - name: Build (unstable ABI) + run: cmake --build build3 -j 2 + + - name: Python tests (unstable ABI) + run: cmake --build build3 --target pytest + + - name: Interface test + run: cmake --build build2 --target test_cmake_build + + # This makes sure the setup_helpers module can build packages using + # setuptools + - name: Setuptools helpers test + run: pytest tests/extra_setuptools + if: "!(matrix.runs-on == 'windows-2022')" + + + deadsnakes: + strategy: + fail-fast: false + matrix: + include: + # TODO: Fails on 3.10, investigate + - python-version: "3.9" + python-debug: true + valgrind: true + # - python-version: "3.11-dev" + # python-debug: false + + name: "🐍 ${{ matrix.python-version }}${{ matrix.python-debug && '-dbg' || '' }} (deadsnakes)${{ matrix.valgrind && ' • Valgrind' || '' }} • x64" + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + + - name: Setup Python ${{ matrix.python-version }} (deadsnakes) + uses: deadsnakes/action@v2.1.1 + with: + python-version: ${{ matrix.python-version }} + debug: ${{ matrix.python-debug }} + + - name: Update CMake + uses: jwlawson/actions-setup-cmake@v1.12 + + - name: Valgrind cache + if: matrix.valgrind + uses: actions/cache@v3 + id: cache-valgrind + with: + path: valgrind + key: 3.16.1 # Valgrind version + + - name: Compile Valgrind + if: matrix.valgrind && steps.cache-valgrind.outputs.cache-hit != 'true' + run: | + VALGRIND_VERSION=3.16.1 + curl https://sourceware.org/pub/valgrind/valgrind-$VALGRIND_VERSION.tar.bz2 -o - | tar xj + mv valgrind-$VALGRIND_VERSION valgrind + cd valgrind + ./configure + make -j 2 > /dev/null + + - name: Install Valgrind + if: matrix.valgrind + working-directory: valgrind + run: | + sudo make install + sudo apt-get update + sudo apt-get install libc6-dbg # Needed by Valgrind + + - name: Prepare env + run: | + python -m pip install -r tests/requirements.txt + + - name: Configure + env: + SETUPTOOLS_USE_DISTUTILS: stdlib + run: > + cmake -S . -B build + -DCMAKE_BUILD_TYPE=Debug + -DPYBIND11_WERROR=ON + -DDOWNLOAD_CATCH=ON + -DDOWNLOAD_EIGEN=ON + -DCMAKE_CXX_STANDARD=17 + + - name: Build + run: cmake --build build -j 2 + + - name: Python tests + run: cmake --build build --target pytest + + - name: C++ tests + run: cmake --build build --target cpptest + + - name: Run Valgrind on Python tests + if: matrix.valgrind + run: cmake --build build --target memcheck + + + # Testing on clang using the excellent silkeh clang docker images + clang: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + clang: + - 3.6 + - 3.7 + - 3.9 + - 7 + - 9 + - dev + std: + - 11 + include: + - clang: 5 + std: 14 + - clang: 10 + std: 20 + - clang: 10 + std: 17 + + name: "🐍 3 • Clang ${{ matrix.clang }} • C++${{ matrix.std }} • x64" + container: "silkeh/clang:${{ matrix.clang }}" + + steps: + - uses: actions/checkout@v3 + + - name: Add wget and python3 + run: apt-get update && apt-get install -y python3-dev python3-numpy python3-pytest libeigen3-dev + + - name: Configure + shell: bash + run: > + cmake -S . -B build + -DPYBIND11_WERROR=ON + -DDOWNLOAD_CATCH=ON + -DCMAKE_CXX_STANDARD=${{ matrix.std }} + -DPYTHON_EXECUTABLE=$(python3 -c "import sys; print(sys.executable)") + + - name: Build + run: cmake --build build -j 2 + + - name: Python tests + run: cmake --build build --target pytest + + - name: C++ tests + run: cmake --build build --target cpptest + + - name: Interface test + run: cmake --build build --target test_cmake_build + + + # Testing NVCC; forces sources to behave like .cu files + cuda: + runs-on: ubuntu-latest + name: "🐍 3.8 • CUDA 11.2 • Ubuntu 20.04" + container: nvidia/cuda:11.2.2-devel-ubuntu20.04 + + steps: + - uses: actions/checkout@v3 + + # tzdata will try to ask for the timezone, so set the DEBIAN_FRONTEND + - name: Install 🐍 3 + run: apt-get update && DEBIAN_FRONTEND="noninteractive" apt-get install -y cmake git python3-dev python3-pytest python3-numpy + + - name: Configure + run: cmake -S . -B build -DPYBIND11_CUDA_TESTS=ON -DPYBIND11_WERROR=ON -DDOWNLOAD_CATCH=ON + + - name: Build + run: cmake --build build -j2 --verbose + + - name: Python tests + run: cmake --build build --target pytest + + +# TODO: Internal compiler error - report to NVidia +# # Testing CentOS 8 + PGI compilers +# centos-nvhpc8: +# runs-on: ubuntu-latest +# name: "🐍 3 • CentOS8 / PGI 20.11 • x64" +# container: centos:8 +# +# steps: +# - uses: actions/checkout@v3 +# +# - name: Add Python 3 and a few requirements +# run: yum update -y && yum install -y git python3-devel python3-numpy python3-pytest make environment-modules +# +# - name: Install CMake with pip +# run: | +# python3 -m pip install --upgrade pip +# python3 -m pip install cmake --prefer-binary +# +# - name: Install NVidia HPC SDK +# run: > +# yum -y install +# https://developer.download.nvidia.com/hpc-sdk/20.11/nvhpc-20-11-20.11-1.x86_64.rpm +# https://developer.download.nvidia.com/hpc-sdk/20.11/nvhpc-2020-20.11-1.x86_64.rpm +# +# - name: Configure +# shell: bash +# run: | +# source /etc/profile.d/modules.sh +# module load /opt/nvidia/hpc_sdk/modulefiles/nvhpc/20.11 +# cmake -S . -B build -DDOWNLOAD_CATCH=ON -DCMAKE_CXX_STANDARD=14 -DPYTHON_EXECUTABLE=$(python3 -c "import sys; print(sys.executable)") +# +# - name: Build +# run: cmake --build build -j 2 --verbose +# +# - name: Python tests +# run: cmake --build build --target pytest +# +# - name: C++ tests +# run: cmake --build build --target cpptest +# +# - name: Interface test +# run: cmake --build build --target test_cmake_build + + + # Testing on CentOS 7 + PGI compilers, which seems to require more workarounds + centos-nvhpc7: + runs-on: ubuntu-latest + name: "🐍 3 • CentOS7 / PGI 22.3 • x64" + container: centos:7 + + steps: + - uses: actions/checkout@v3 + + - name: Add Python 3 and a few requirements + run: yum update -y && yum install -y epel-release && yum install -y git python3-devel make environment-modules cmake3 yum-utils + + - name: Install NVidia HPC SDK + run: yum-config-manager --add-repo https://developer.download.nvidia.com/hpc-sdk/rhel/nvhpc.repo && yum -y install nvhpc-22.3 + + # On CentOS 7, we have to filter a few tests (compiler internal error) + # and allow deeper template recursion (not needed on CentOS 8 with a newer + # standard library). On some systems, you many need further workarounds: + # https://github.com/pybind/pybind11/pull/2475 + - name: Configure + shell: bash + run: | + source /etc/profile.d/modules.sh + module load /opt/nvidia/hpc_sdk/modulefiles/nvhpc/22.3 + cmake3 -S . -B build -DDOWNLOAD_CATCH=ON \ + -DCMAKE_CXX_STANDARD=11 \ + -DPYTHON_EXECUTABLE=$(python3 -c "import sys; print(sys.executable)") \ + -DCMAKE_CXX_FLAGS="-Wc,--pending_instantiations=0" \ + -DPYBIND11_TEST_FILTER="test_smart_ptr.cpp;test_virtual_functions.cpp" + + # Building before installing Pip should produce a warning but not an error + - name: Build + run: cmake3 --build build -j 2 --verbose + + - name: Install CMake with pip + run: | + python3 -m pip install --upgrade pip + python3 -m pip install pytest + + - name: Python tests + run: cmake3 --build build --target pytest + + - name: C++ tests + run: cmake3 --build build --target cpptest + + - name: Interface test + run: cmake3 --build build --target test_cmake_build + + + # Testing on GCC using the GCC docker images (only recent images supported) + gcc: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + gcc: + - 7 + - latest + std: + - 11 + include: + - gcc: 10 + std: 20 + + name: "🐍 3 • GCC ${{ matrix.gcc }} • C++${{ matrix.std }}• x64" + container: "gcc:${{ matrix.gcc }}" + + steps: + - uses: actions/checkout@v3 + + - name: Add Python 3 + run: apt-get update; apt-get install -y python3-dev python3-numpy python3-pytest python3-pip libeigen3-dev + + - name: Update pip + run: python3 -m pip install --upgrade pip + + - name: Update CMake + uses: jwlawson/actions-setup-cmake@v1.12 + + - name: Configure + shell: bash + run: > + cmake -S . -B build + -DPYBIND11_WERROR=ON + -DDOWNLOAD_CATCH=ON + -DCMAKE_CXX_STANDARD=${{ matrix.std }} + -DPYTHON_EXECUTABLE=$(python3 -c "import sys; print(sys.executable)") + + - name: Build + run: cmake --build build -j 2 + + - name: Python tests + run: cmake --build build --target pytest + + - name: C++ tests + run: cmake --build build --target cpptest + + - name: Interface test + run: cmake --build build --target test_cmake_build + + + # Testing on ICC using the oneAPI apt repo + icc: + runs-on: ubuntu-20.04 + strategy: + fail-fast: false + + name: "🐍 3 • ICC latest • x64" + + steps: + - uses: actions/checkout@v3 + + - name: Add apt repo + run: | + sudo apt-get update + sudo apt-get install -y wget build-essential pkg-config cmake ca-certificates gnupg + wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB + sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB + echo "deb https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list + + - name: Add ICC & Python 3 + run: sudo apt-get update; sudo apt-get install -y intel-oneapi-compiler-dpcpp-cpp-and-cpp-classic cmake python3-dev python3-numpy python3-pytest python3-pip + + - name: Update pip + run: | + set +e; source /opt/intel/oneapi/setvars.sh; set -e + python3 -m pip install --upgrade pip + + - name: Install dependencies + run: | + set +e; source /opt/intel/oneapi/setvars.sh; set -e + python3 -m pip install -r tests/requirements.txt + + - name: Configure C++11 + run: | + set +e; source /opt/intel/oneapi/setvars.sh; set -e + cmake -S . -B build-11 \ + -DPYBIND11_WERROR=ON \ + -DDOWNLOAD_CATCH=ON \ + -DDOWNLOAD_EIGEN=OFF \ + -DCMAKE_CXX_STANDARD=11 \ + -DCMAKE_CXX_COMPILER=$(which icpc) \ + -DPYTHON_EXECUTABLE=$(python3 -c "import sys; print(sys.executable)") + + - name: Build C++11 + run: | + set +e; source /opt/intel/oneapi/setvars.sh; set -e + cmake --build build-11 -j 2 -v + + - name: Python tests C++11 + run: | + set +e; source /opt/intel/oneapi/setvars.sh; set -e + sudo service apport stop + cmake --build build-11 --target check + + - name: C++ tests C++11 + run: | + set +e; source /opt/intel/oneapi/setvars.sh; set -e + cmake --build build-11 --target cpptest + + - name: Interface test C++11 + run: | + set +e; source /opt/intel/oneapi/setvars.sh; set -e + cmake --build build-11 --target test_cmake_build + + - name: Configure C++17 + run: | + set +e; source /opt/intel/oneapi/setvars.sh; set -e + cmake -S . -B build-17 \ + -DPYBIND11_WERROR=ON \ + -DDOWNLOAD_CATCH=ON \ + -DDOWNLOAD_EIGEN=OFF \ + -DCMAKE_CXX_STANDARD=17 \ + -DCMAKE_CXX_COMPILER=$(which icpc) \ + -DPYTHON_EXECUTABLE=$(python3 -c "import sys; print(sys.executable)") + + - name: Build C++17 + run: | + set +e; source /opt/intel/oneapi/setvars.sh; set -e + cmake --build build-17 -j 2 -v + + - name: Python tests C++17 + run: | + set +e; source /opt/intel/oneapi/setvars.sh; set -e + sudo service apport stop + cmake --build build-17 --target check + + - name: C++ tests C++17 + run: | + set +e; source /opt/intel/oneapi/setvars.sh; set -e + cmake --build build-17 --target cpptest + + - name: Interface test C++17 + run: | + set +e; source /opt/intel/oneapi/setvars.sh; set -e + cmake --build build-17 --target test_cmake_build + + + # Testing on CentOS (manylinux uses a centos base, and this is an easy way + # to get GCC 4.8, which is the manylinux1 compiler). + centos: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + centos: + - centos7 # GCC 4.8 + - stream8 + + name: "🐍 3 • CentOS ${{ matrix.centos }} • x64" + container: "quay.io/centos/centos:${{ matrix.centos }}" + + steps: + - uses: actions/checkout@v3 + + - name: Add Python 3 + run: yum update -y && yum install -y python3-devel gcc-c++ make git + + - name: Update pip + run: python3 -m pip install --upgrade pip + + - name: Install dependencies + run: | + python3 -m pip install cmake -r tests/requirements.txt + + - name: Configure + shell: bash + run: > + cmake -S . -B build + -DCMAKE_BUILD_TYPE=MinSizeRel + -DPYBIND11_WERROR=ON + -DDOWNLOAD_CATCH=ON + -DDOWNLOAD_EIGEN=ON + -DCMAKE_CXX_STANDARD=11 + -DPYTHON_EXECUTABLE=$(python3 -c "import sys; print(sys.executable)") + + - name: Build + run: cmake --build build -j 2 + + - name: Python tests + run: cmake --build build --target pytest + + - name: C++ tests + run: cmake --build build --target cpptest + + - name: Interface test + run: cmake --build build --target test_cmake_build + + + # This tests an "install" with the CMake tools + install-classic: + name: "🐍 3.7 • Debian • x86 • Install" + runs-on: ubuntu-latest + container: i386/debian:buster + + steps: + - uses: actions/checkout@v1 # Required to run inside docker + + - name: Install requirements + run: | + apt-get update + apt-get install -y git make cmake g++ libeigen3-dev python3-dev python3-pip + pip3 install "pytest==6.*" + + - name: Configure for install + run: > + cmake . + -DPYBIND11_INSTALL=1 -DPYBIND11_TEST=0 + -DPYTHON_EXECUTABLE=$(python3 -c "import sys; print(sys.executable)") + + - name: Make and install + run: make install + + - name: Copy tests to new directory + run: cp -a tests /pybind11-tests + + - name: Make a new test directory + run: mkdir /build-tests + + - name: Configure tests + run: > + cmake ../pybind11-tests + -DDOWNLOAD_CATCH=ON + -DPYBIND11_WERROR=ON + -DPYTHON_EXECUTABLE=$(python3 -c "import sys; print(sys.executable)") + working-directory: /build-tests + + - name: Python tests + run: make pytest -j 2 + working-directory: /build-tests + + + # This verifies that the documentation is not horribly broken, and does a + # basic validation check on the SDist. + doxygen: + name: "Documentation build test" + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + + - uses: actions/setup-python@v3 + + - name: Install Doxygen + run: sudo apt-get install -y doxygen librsvg2-bin # Changed to rsvg-convert in 20.04 + + - name: Build docs + run: pipx run nox -s docs + + - name: Make SDist + run: pipx run nox -s build -- --sdist + + - run: git status --ignored + + - name: Check local include dir + run: > + ls pybind11; + python3 -c "import pybind11, pathlib; assert (a := pybind11.get_include()) == (b := str(pathlib.Path('include').resolve())), f'{a} != {b}'" + + - name: Compare Dists (headers only) + working-directory: include + run: | + python3 -m pip install --user -U ../dist/*.tar.gz + installed=$(python3 -c "import pybind11; print(pybind11.get_include() + '/pybind11')") + diff -rq $installed ./pybind11 + + win32: + strategy: + fail-fast: false + matrix: + python: + - 3.6 + - 3.7 + - 3.8 + - 3.9 + + include: + - python: 3.9 + args: -DCMAKE_CXX_STANDARD=20 + - python: 3.8 + args: -DCMAKE_CXX_STANDARD=17 + + name: "🐍 ${{ matrix.python }} • MSVC 2019 • x86 ${{ matrix.args }}" + runs-on: windows-2019 + + steps: + - uses: actions/checkout@v3 + + - name: Setup Python ${{ matrix.python }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python }} + architecture: x86 + + - name: Update CMake + uses: jwlawson/actions-setup-cmake@v1.12 + + - name: Prepare MSVC + uses: ilammy/msvc-dev-cmd@v1.10.0 + with: + arch: x86 + + - name: Prepare env + run: | + python -m pip install -r tests/requirements.txt + + # First build - C++11 mode and inplace + - name: Configure ${{ matrix.args }} + run: > + cmake -S . -B build + -G "Visual Studio 16 2019" -A Win32 + -DPYBIND11_WERROR=ON + -DDOWNLOAD_CATCH=ON + -DDOWNLOAD_EIGEN=ON + ${{ matrix.args }} + - name: Build C++11 + run: cmake --build build -j 2 + + - name: Python tests + run: cmake --build build -t pytest + + win32-debug: + strategy: + fail-fast: false + matrix: + python: + - 3.8 + - 3.9 + + include: + - python: 3.9 + args: -DCMAKE_CXX_STANDARD=20 + - python: 3.8 + args: -DCMAKE_CXX_STANDARD=17 + + name: "🐍 ${{ matrix.python }} • MSVC 2019 (Debug) • x86 ${{ matrix.args }}" + runs-on: windows-2019 + + steps: + - uses: actions/checkout@v3 + + - name: Setup Python ${{ matrix.python }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python }} + architecture: x86 + + - name: Update CMake + uses: jwlawson/actions-setup-cmake@v1.12 + + - name: Prepare MSVC + uses: ilammy/msvc-dev-cmd@v1.10.0 + with: + arch: x86 + + - name: Prepare env + run: | + python -m pip install -r tests/requirements.txt + + # First build - C++11 mode and inplace + - name: Configure ${{ matrix.args }} + run: > + cmake -S . -B build + -G "Visual Studio 16 2019" -A Win32 + -DCMAKE_BUILD_TYPE=Debug + -DPYBIND11_WERROR=ON + -DDOWNLOAD_CATCH=ON + -DDOWNLOAD_EIGEN=ON + ${{ matrix.args }} + - name: Build C++11 + run: cmake --build build --config Debug -j 2 + + - name: Python tests + run: cmake --build build --config Debug -t pytest + + + windows-2022: + strategy: + fail-fast: false + matrix: + python: + - 3.9 + + name: "🐍 ${{ matrix.python }} • MSVC 2022 C++20 • x64" + runs-on: windows-2022 + + steps: + - uses: actions/checkout@v3 + + - name: Setup Python ${{ matrix.python }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python }} + + - name: Prepare env + run: | + python3 -m pip install -r tests/requirements.txt + + - name: Update CMake + uses: jwlawson/actions-setup-cmake@v1.12 + + - name: Configure C++20 + run: > + cmake -S . -B build + -DPYBIND11_WERROR=ON + -DDOWNLOAD_CATCH=ON + -DDOWNLOAD_EIGEN=ON + -DCMAKE_CXX_STANDARD=20 + + - name: Build C++20 + run: cmake --build build -j 2 + + - name: Python tests + run: cmake --build build --target pytest + + - name: C++20 tests + run: cmake --build build --target cpptest -j 2 + + - name: Interface test C++20 + run: cmake --build build --target test_cmake_build + + mingw: + name: "🐍 3 • windows-latest • ${{ matrix.sys }}" + runs-on: windows-latest + defaults: + run: + shell: msys2 {0} + strategy: + fail-fast: false + matrix: + include: + - { sys: mingw64, env: x86_64 } + - { sys: mingw32, env: i686 } + steps: + - uses: msys2/setup-msys2@v2 + with: + msystem: ${{matrix.sys}} + install: >- + git + mingw-w64-${{matrix.env}}-gcc + mingw-w64-${{matrix.env}}-python-pip + mingw-w64-${{matrix.env}}-python-numpy + mingw-w64-${{matrix.env}}-python-scipy + mingw-w64-${{matrix.env}}-cmake + mingw-w64-${{matrix.env}}-make + mingw-w64-${{matrix.env}}-python-pytest + mingw-w64-${{matrix.env}}-eigen3 + mingw-w64-${{matrix.env}}-boost + mingw-w64-${{matrix.env}}-catch + + - uses: actions/checkout@v3 + + - name: Configure C++11 + # LTO leads to many undefined reference like + # `pybind11::detail::function_call::function_call(pybind11::detail::function_call&&) + run: cmake -G "MinGW Makefiles" -DCMAKE_CXX_STANDARD=11 -DDOWNLOAD_CATCH=ON -S . -B build + + - name: Build C++11 + run: cmake --build build -j 2 + + - name: Python tests C++11 + run: cmake --build build --target pytest -j 2 + + - name: C++11 tests + run: PYTHONHOME=/${{matrix.sys}} PYTHONPATH=/${{matrix.sys}} cmake --build build --target cpptest -j 2 + + - name: Interface test C++11 + run: PYTHONHOME=/${{matrix.sys}} PYTHONPATH=/${{matrix.sys}} cmake --build build --target test_cmake_build + + - name: Clean directory + run: git clean -fdx + + - name: Configure C++14 + run: cmake -G "MinGW Makefiles" -DCMAKE_CXX_STANDARD=14 -DDOWNLOAD_CATCH=ON -S . -B build2 + + - name: Build C++14 + run: cmake --build build2 -j 2 + + - name: Python tests C++14 + run: cmake --build build2 --target pytest -j 2 + + - name: C++14 tests + run: PYTHONHOME=/${{matrix.sys}} PYTHONPATH=/${{matrix.sys}} cmake --build build2 --target cpptest -j 2 + + - name: Interface test C++14 + run: PYTHONHOME=/${{matrix.sys}} PYTHONPATH=/${{matrix.sys}} cmake --build build2 --target test_cmake_build + + - name: Clean directory + run: git clean -fdx + + - name: Configure C++17 + run: cmake -G "MinGW Makefiles" -DCMAKE_CXX_STANDARD=17 -DDOWNLOAD_CATCH=ON -S . -B build3 + + - name: Build C++17 + run: cmake --build build3 -j 2 + + - name: Python tests C++17 + run: cmake --build build3 --target pytest -j 2 + + - name: C++17 tests + run: PYTHONHOME=/${{matrix.sys}} PYTHONPATH=/${{matrix.sys}} cmake --build build3 --target cpptest -j 2 + + - name: Interface test C++17 + run: PYTHONHOME=/${{matrix.sys}} PYTHONPATH=/${{matrix.sys}} cmake --build build3 --target test_cmake_build diff --git a/third_party/pybind11/.github/workflows/configure.yml b/third_party/pybind11/.github/workflows/configure.yml new file mode 100644 index 0000000000..aa2485ac08 --- /dev/null +++ b/third_party/pybind11/.github/workflows/configure.yml @@ -0,0 +1,80 @@ +name: Config + +on: + workflow_dispatch: + pull_request: + push: + branches: + - master + - stable + - v* + +jobs: + # This tests various versions of CMake in various combinations, to make sure + # the configure step passes. + cmake: + strategy: + fail-fast: false + matrix: + runs-on: [ubuntu-latest, macos-latest, windows-latest] + arch: [x64] + cmake: ["3.23"] + + include: + - runs-on: ubuntu-latest + arch: x64 + cmake: 3.4 + + - runs-on: macos-latest + arch: x64 + cmake: 3.7 + + - runs-on: windows-2019 + arch: x64 # x86 compilers seem to be missing on 2019 image + cmake: 3.18 + + name: 🐍 3.7 • CMake ${{ matrix.cmake }} • ${{ matrix.runs-on }} + runs-on: ${{ matrix.runs-on }} + + steps: + - uses: actions/checkout@v3 + + - name: Setup Python 3.7 + uses: actions/setup-python@v3 + with: + python-version: 3.7 + architecture: ${{ matrix.arch }} + + - name: Prepare env + run: python -m pip install -r tests/requirements.txt + + # An action for adding a specific version of CMake: + # https://github.com/jwlawson/actions-setup-cmake + - name: Setup CMake ${{ matrix.cmake }} + uses: jwlawson/actions-setup-cmake@v1.12 + with: + cmake-version: ${{ matrix.cmake }} + + # These steps use a directory with a space in it intentionally + - name: Make build directories + run: mkdir "build dir" + + - name: Configure + working-directory: build dir + shell: bash + run: > + cmake .. + -DPYBIND11_WERROR=ON + -DDOWNLOAD_CATCH=ON + -DPYTHON_EXECUTABLE=$(python -c "import sys; print(sys.executable)") + + # Only build and test if this was manually triggered in the GitHub UI + - name: Build + working-directory: build dir + if: github.event_name == 'workflow_dispatch' + run: cmake --build . --config Release + + - name: Test + working-directory: build dir + if: github.event_name == 'workflow_dispatch' + run: cmake --build . --config Release --target check diff --git a/third_party/pybind11/.github/workflows/format.yml b/third_party/pybind11/.github/workflows/format.yml new file mode 100644 index 0000000000..5eb228a707 --- /dev/null +++ b/third_party/pybind11/.github/workflows/format.yml @@ -0,0 +1,53 @@ +# This is a format job. Pre-commit has a first-party GitHub action, so we use +# that: https://github.com/pre-commit/action + +name: Format + +on: + workflow_dispatch: + pull_request: + push: + branches: + - master + - stable + - "v*" + +env: + FORCE_COLOR: 3 + +jobs: + pre-commit: + name: Format + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v3 + - name: Add matchers + run: echo "::add-matcher::$GITHUB_WORKSPACE/.github/matchers/pylint.json" + - uses: pre-commit/action@v2.0.3 + with: + # Slow hooks are marked with manual - slow is okay here, run them too + extra_args: --hook-stage manual --all-files + + clang-tidy: + # When making changes here, please also review the "Clang-Tidy" section + # in .github/CONTRIBUTING.md and update as needed. + name: Clang-Tidy + runs-on: ubuntu-latest + container: silkeh/clang:12 + steps: + - uses: actions/checkout@v3 + + - name: Install requirements + run: apt-get update && apt-get install -y python3-dev python3-pytest + + - name: Configure + run: > + cmake -S . -B build + -DCMAKE_CXX_CLANG_TIDY="$(which clang-tidy)" + -DDOWNLOAD_EIGEN=ON + -DDOWNLOAD_CATCH=ON + -DCMAKE_CXX_STANDARD=17 + + - name: Build + run: cmake --build build -j 2 -- --keep-going diff --git a/third_party/pybind11/.github/workflows/labeler.yml b/third_party/pybind11/.github/workflows/labeler.yml new file mode 100644 index 0000000000..d2b5979681 --- /dev/null +++ b/third_party/pybind11/.github/workflows/labeler.yml @@ -0,0 +1,16 @@ +name: Labeler +on: + pull_request_target: + types: [closed] + +jobs: + label: + name: Labeler + runs-on: ubuntu-latest + steps: + + - uses: actions/labeler@main + if: github.event.pull_request.merged == true + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} + configuration-path: .github/labeler_merged.yml diff --git a/third_party/pybind11/.github/workflows/pip.yml b/third_party/pybind11/.github/workflows/pip.yml new file mode 100644 index 0000000000..1d9a35604e --- /dev/null +++ b/third_party/pybind11/.github/workflows/pip.yml @@ -0,0 +1,108 @@ +name: Pip + +on: + workflow_dispatch: + pull_request: + push: + branches: + - master + - stable + - v* + release: + types: + - published + +env: + PIP_ONLY_BINARY: numpy + +jobs: + # This builds the sdists and wheels and makes sure the files are exactly as + # expected. Using Windows and Python 3.6, since that is often the most + # challenging matrix element. + test-packaging: + name: 🐍 3.6 • 📦 tests • windows-latest + runs-on: windows-latest + + steps: + - uses: actions/checkout@v3 + + - name: Setup 🐍 3.6 + uses: actions/setup-python@v3 + with: + python-version: 3.6 + + - name: Prepare env + run: | + python -m pip install -r tests/requirements.txt + + - name: Python Packaging tests + run: pytest tests/extra_python_package/ + + + # This runs the packaging tests and also builds and saves the packages as + # artifacts. + packaging: + name: 🐍 3.8 • 📦 & 📦 tests • ubuntu-latest + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + + - name: Setup 🐍 3.8 + uses: actions/setup-python@v3 + with: + python-version: 3.8 + + - name: Prepare env + run: | + python -m pip install -r tests/requirements.txt build twine + + - name: Python Packaging tests + run: pytest tests/extra_python_package/ + + - name: Build SDist and wheels + run: | + python -m build + PYBIND11_GLOBAL_SDIST=1 python -m build + + - name: Check metadata + run: twine check dist/* + + - name: Save standard package + uses: actions/upload-artifact@v3 + with: + name: standard + path: dist/pybind11-* + + - name: Save global package + uses: actions/upload-artifact@v3 + with: + name: global + path: dist/pybind11_global-* + + + + # When a GitHub release is made, upload the artifacts to PyPI + upload: + name: Upload to PyPI + runs-on: ubuntu-latest + if: github.event_name == 'release' && github.event.action == 'published' + needs: [packaging] + + steps: + - uses: actions/setup-python@v3 + + # Downloads all to directories matching the artifact names + - uses: actions/download-artifact@v3 + + - name: Publish standard package + uses: pypa/gh-action-pypi-publish@v1.5.0 + with: + password: ${{ secrets.pypi_password }} + packages_dir: standard/ + + - name: Publish global package + uses: pypa/gh-action-pypi-publish@v1.5.0 + with: + password: ${{ secrets.pypi_password_global }} + packages_dir: global/ diff --git a/third_party/pybind11/.github/workflows/upstream.yml b/third_party/pybind11/.github/workflows/upstream.yml new file mode 100644 index 0000000000..174dc24965 --- /dev/null +++ b/third_party/pybind11/.github/workflows/upstream.yml @@ -0,0 +1,112 @@ + +name: Upstream + +on: + workflow_dispatch: + pull_request: + +concurrency: + group: upstream-${{ github.ref }} + cancel-in-progress: true + +env: + PIP_ONLY_BINARY: numpy + +jobs: + standard: + name: "🐍 3.11 dev • ubuntu-latest • x64" + runs-on: ubuntu-latest + if: "contains(github.event.pull_request.labels.*.name, 'python dev')" + + steps: + - uses: actions/checkout@v3 + + - name: Setup Python 3.11 + uses: actions/setup-python@v3 + with: + python-version: "3.11-dev" + + - name: Setup Boost (Linux) + if: runner.os == 'Linux' + run: sudo apt-get install libboost-dev + + - name: Update CMake + uses: jwlawson/actions-setup-cmake@v1.12 + + - name: Prepare env + run: | + python -m pip install -r tests/requirements.txt + + - name: Setup annotations on Linux + if: runner.os == 'Linux' + run: python -m pip install pytest-github-actions-annotate-failures + + # First build - C++11 mode and inplace + - name: Configure C++11 + run: > + cmake -S . -B . + -DPYBIND11_WERROR=ON + -DDOWNLOAD_CATCH=ON + -DDOWNLOAD_EIGEN=ON + -DCMAKE_CXX_STANDARD=11 + + - name: Build C++11 + run: cmake --build . -j 2 + + - name: Python tests C++11 + run: cmake --build . --target pytest -j 2 + + - name: C++11 tests + run: cmake --build . --target cpptest -j 2 + + - name: Interface test C++11 + run: cmake --build . --target test_cmake_build + + - name: Clean directory + run: git clean -fdx + + # Second build - C++17 mode and in a build directory + - name: Configure C++17 + run: > + cmake -S . -B build2 + -DPYBIND11_WERROR=ON + -DDOWNLOAD_CATCH=ON + -DDOWNLOAD_EIGEN=ON + -DCMAKE_CXX_STANDARD=17 + ${{ matrix.args }} + ${{ matrix.args2 }} + + - name: Build + run: cmake --build build2 -j 2 + + - name: Python tests + run: cmake --build build2 --target pytest + + - name: C++ tests + run: cmake --build build2 --target cpptest + + # Third build - C++17 mode with unstable ABI + - name: Configure (unstable ABI) + run: > + cmake -S . -B build3 + -DPYBIND11_WERROR=ON + -DDOWNLOAD_CATCH=ON + -DDOWNLOAD_EIGEN=ON + -DCMAKE_CXX_STANDARD=17 + -DPYBIND11_INTERNALS_VERSION=10000000 + "-DPYBIND11_TEST_OVERRIDE=test_call_policies.cpp;test_gil_scoped.cpp;test_thread.cpp" + ${{ matrix.args }} + + - name: Build (unstable ABI) + run: cmake --build build3 -j 2 + + - name: Python tests (unstable ABI) + run: cmake --build build3 --target pytest + + - name: Interface test + run: cmake --build build2 --target test_cmake_build + + # This makes sure the setup_helpers module can build packages using + # setuptools + - name: Setuptools helpers test + run: pytest tests/extra_setuptools diff --git a/third_party/pybind11/.gitignore b/third_party/pybind11/.gitignore new file mode 100644 index 0000000000..3cf4fbbda0 --- /dev/null +++ b/third_party/pybind11/.gitignore @@ -0,0 +1,45 @@ +CMakeCache.txt +CMakeFiles +Makefile +cmake_install.cmake +cmake_uninstall.cmake +.DS_Store +*.so +*.pyd +*.dll +*.sln +*.sdf +*.opensdf +*.vcxproj +*.vcxproj.user +*.filters +example.dir +Win32 +x64 +Release +Debug +.vs +CTestTestfile.cmake +Testing +autogen +MANIFEST +/.ninja_* +/*.ninja +/docs/.build +*.py[co] +*.egg-info +*~ +.*.swp +.DS_Store +/dist +/*build* +.cache/ +sosize-*.txt +pybind11Config*.cmake +pybind11Targets.cmake +/*env* +/.vscode +/pybind11/include/* +/pybind11/share/* +/docs/_build/* +.ipynb_checkpoints/ diff --git a/third_party/pybind11/.pre-commit-config.yaml b/third_party/pybind11/.pre-commit-config.yaml new file mode 100644 index 0000000000..03f829c251 --- /dev/null +++ b/third_party/pybind11/.pre-commit-config.yaml @@ -0,0 +1,170 @@ +# To use: +# +# pre-commit run -a +# +# Or: +# +# pre-commit install # (runs every time you commit in git) +# +# To update this file: +# +# pre-commit autoupdate +# +# See https://github.com/pre-commit/pre-commit + +repos: +# Standard hooks +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: "v4.2.0" + hooks: + - id: check-added-large-files + - id: check-case-conflict + - id: check-docstring-first + - id: check-merge-conflict + - id: check-symlinks + - id: check-toml + - id: check-yaml + - id: debug-statements + - id: end-of-file-fixer + - id: mixed-line-ending + - id: requirements-txt-fixer + - id: trailing-whitespace + +# Upgrade old Python syntax +- repo: https://github.com/asottile/pyupgrade + rev: "v2.32.1" + hooks: + - id: pyupgrade + args: [--py36-plus] + +# Nicely sort includes +- repo: https://github.com/PyCQA/isort + rev: "5.10.1" + hooks: + - id: isort + +# Black, the code formatter, natively supports pre-commit +- repo: https://github.com/psf/black + rev: "22.3.0" # Keep in sync with blacken-docs + hooks: + - id: black + +# Also code format the docs +- repo: https://github.com/asottile/blacken-docs + rev: "v1.12.1" + hooks: + - id: blacken-docs + additional_dependencies: + - black==22.3.0 # keep in sync with black hook + +# Changes tabs to spaces +- repo: https://github.com/Lucas-C/pre-commit-hooks + rev: "v1.1.13" + hooks: + - id: remove-tabs + +- repo: https://github.com/sirosen/texthooks + rev: "0.3.1" + hooks: + - id: fix-ligatures + - id: fix-smartquotes + +# Autoremoves unused imports +- repo: https://github.com/hadialqattan/pycln + rev: "v1.3.2" + hooks: + - id: pycln + stages: [manual] + +# Checking for common mistakes +- repo: https://github.com/pre-commit/pygrep-hooks + rev: "v1.9.0" + hooks: + - id: python-check-blanket-noqa + - id: python-check-blanket-type-ignore + - id: python-no-log-warn + - id: python-use-type-annotations + - id: rst-backticks + - id: rst-directive-colons + - id: rst-inline-touching-normal + +# Automatically remove noqa that are not used +- repo: https://github.com/asottile/yesqa + rev: "v1.3.0" + hooks: + - id: yesqa + additional_dependencies: &flake8_dependencies + - flake8-bugbear + - pep8-naming + +# Flake8 also supports pre-commit natively (same author) +- repo: https://github.com/PyCQA/flake8 + rev: "4.0.1" + hooks: + - id: flake8 + exclude: ^(docs/.*|tools/.*)$ + additional_dependencies: *flake8_dependencies + +# PyLint has native support - not always usable, but works for us +- repo: https://github.com/PyCQA/pylint + rev: "v2.13.8" + hooks: + - id: pylint + files: ^pybind11 + +# CMake formatting +- repo: https://github.com/cheshirekow/cmake-format-precommit + rev: "v0.6.13" + hooks: + - id: cmake-format + additional_dependencies: [pyyaml] + types: [file] + files: (\.cmake|CMakeLists.txt)(.in)?$ + +# Check static types with mypy +- repo: https://github.com/pre-commit/mirrors-mypy + rev: "v0.950" + hooks: + - id: mypy + args: [] + exclude: ^(tests|docs)/ + additional_dependencies: [nox, rich] + +# Checks the manifest for missing files (native support) +- repo: https://github.com/mgedmin/check-manifest + rev: "0.48" + hooks: + - id: check-manifest + # This is a slow hook, so only run this if --hook-stage manual is passed + stages: [manual] + additional_dependencies: [cmake, ninja] + +# Check for spelling +- repo: https://github.com/codespell-project/codespell + rev: "v2.1.0" + hooks: + - id: codespell + exclude: ".supp$" + args: ["-L", "nd,ot,thist"] + +# Check for common shell mistakes +- repo: https://github.com/shellcheck-py/shellcheck-py + rev: "v0.8.0.4" + hooks: + - id: shellcheck + +# Disallow some common capitalization mistakes +- repo: local + hooks: + - id: disallow-caps + name: Disallow improper capitalization + language: pygrep + entry: PyBind|Numpy|Cmake|CCache|PyTest + exclude: ^\.pre-commit-config.yaml$ + +# Clang format the codebase automatically +- repo: https://github.com/pre-commit/mirrors-clang-format + rev: "v14.0.3" + hooks: + - id: clang-format + types_or: [c++, c, cuda] diff --git a/third_party/pybind11/.readthedocs.yml b/third_party/pybind11/.readthedocs.yml new file mode 100644 index 0000000000..c9c61617ca --- /dev/null +++ b/third_party/pybind11/.readthedocs.yml @@ -0,0 +1,3 @@ +python: + version: 3 +requirements_file: docs/requirements.txt diff --git a/third_party/pybind11/CMakeLists.txt b/third_party/pybind11/CMakeLists.txt new file mode 100644 index 0000000000..3787982cbd --- /dev/null +++ b/third_party/pybind11/CMakeLists.txt @@ -0,0 +1,299 @@ +# CMakeLists.txt -- Build system for the pybind11 modules +# +# Copyright (c) 2015 Wenzel Jakob +# +# All rights reserved. Use of this source code is governed by a +# BSD-style license that can be found in the LICENSE file. + +cmake_minimum_required(VERSION 3.4) + +# The `cmake_minimum_required(VERSION 3.4...3.22)` syntax does not work with +# some versions of VS that have a patched CMake 3.11. This forces us to emulate +# the behavior using the following workaround: +if(${CMAKE_VERSION} VERSION_LESS 3.22) + cmake_policy(VERSION ${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}) +else() + cmake_policy(VERSION 3.22) +endif() + +# Avoid infinite recursion if tests include this as a subdirectory +if(DEFINED PYBIND11_MASTER_PROJECT) + return() +endif() + +# Extract project version from source +file(STRINGS "${CMAKE_CURRENT_SOURCE_DIR}/include/pybind11/detail/common.h" + pybind11_version_defines REGEX "#define PYBIND11_VERSION_(MAJOR|MINOR|PATCH) ") + +foreach(ver ${pybind11_version_defines}) + if(ver MATCHES [[#define PYBIND11_VERSION_(MAJOR|MINOR|PATCH) +([^ ]+)$]]) + set(PYBIND11_VERSION_${CMAKE_MATCH_1} "${CMAKE_MATCH_2}") + endif() +endforeach() + +if(PYBIND11_VERSION_PATCH MATCHES [[\.([a-zA-Z0-9]+)$]]) + set(pybind11_VERSION_TYPE "${CMAKE_MATCH_1}") +endif() +string(REGEX MATCH "^[0-9]+" PYBIND11_VERSION_PATCH "${PYBIND11_VERSION_PATCH}") + +project( + pybind11 + LANGUAGES CXX + VERSION "${PYBIND11_VERSION_MAJOR}.${PYBIND11_VERSION_MINOR}.${PYBIND11_VERSION_PATCH}") + +# Standard includes +include(GNUInstallDirs) +include(CMakePackageConfigHelpers) +include(CMakeDependentOption) + +if(NOT pybind11_FIND_QUIETLY) + message(STATUS "pybind11 v${pybind11_VERSION} ${pybind11_VERSION_TYPE}") +endif() + +# Check if pybind11 is being used directly or via add_subdirectory +if(CMAKE_SOURCE_DIR STREQUAL PROJECT_SOURCE_DIR) + ### Warn if not an out-of-source builds + if(CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_CURRENT_BINARY_DIR) + set(lines + "You are building in-place. If that is not what you intended to " + "do, you can clean the source directory with:\n" + "rm -r CMakeCache.txt CMakeFiles/ cmake_uninstall.cmake pybind11Config.cmake " + "pybind11ConfigVersion.cmake tests/CMakeFiles/\n") + message(AUTHOR_WARNING ${lines}) + endif() + + set(PYBIND11_MASTER_PROJECT ON) + + if(OSX AND CMAKE_VERSION VERSION_LESS 3.7) + # Bug in macOS CMake < 3.7 is unable to download catch + message(WARNING "CMAKE 3.7+ needed on macOS to download catch, and newer HIGHLY recommended") + elseif(WINDOWS AND CMAKE_VERSION VERSION_LESS 3.8) + # Only tested with 3.8+ in CI. + message(WARNING "CMAKE 3.8+ tested on Windows, previous versions untested") + endif() + + message(STATUS "CMake ${CMAKE_VERSION}") + + if(CMAKE_CXX_STANDARD) + set(CMAKE_CXX_EXTENSIONS OFF) + set(CMAKE_CXX_STANDARD_REQUIRED ON) + endif() + + set(pybind11_system "") + + set_property(GLOBAL PROPERTY USE_FOLDERS ON) +else() + set(PYBIND11_MASTER_PROJECT OFF) + set(pybind11_system SYSTEM) +endif() + +# Options +option(PYBIND11_INSTALL "Install pybind11 header files?" ${PYBIND11_MASTER_PROJECT}) +option(PYBIND11_TEST "Build pybind11 test suite?" ${PYBIND11_MASTER_PROJECT}) +option(PYBIND11_NOPYTHON "Disable search for Python" OFF) +set(PYBIND11_INTERNALS_VERSION + "" + CACHE STRING "Override the ABI version, may be used to enable the unstable ABI.") + +cmake_dependent_option( + USE_PYTHON_INCLUDE_DIR + "Install pybind11 headers in Python include directory instead of default installation prefix" + OFF "PYBIND11_INSTALL" OFF) + +cmake_dependent_option(PYBIND11_FINDPYTHON "Force new FindPython" OFF + "NOT CMAKE_VERSION VERSION_LESS 3.12" OFF) + +# NB: when adding a header don't forget to also add it to setup.py +set(PYBIND11_HEADERS + include/pybind11/detail/class.h + include/pybind11/detail/common.h + include/pybind11/detail/descr.h + include/pybind11/detail/init.h + include/pybind11/detail/internals.h + include/pybind11/detail/type_caster_base.h + include/pybind11/detail/typeid.h + include/pybind11/attr.h + include/pybind11/buffer_info.h + include/pybind11/cast.h + include/pybind11/chrono.h + include/pybind11/common.h + include/pybind11/complex.h + include/pybind11/options.h + include/pybind11/eigen.h + include/pybind11/embed.h + include/pybind11/eval.h + include/pybind11/gil.h + include/pybind11/iostream.h + include/pybind11/functional.h + include/pybind11/numpy.h + include/pybind11/operators.h + include/pybind11/pybind11.h + include/pybind11/pytypes.h + include/pybind11/stl.h + include/pybind11/stl_bind.h + include/pybind11/stl/filesystem.h) + +# Compare with grep and warn if mismatched +if(PYBIND11_MASTER_PROJECT AND NOT CMAKE_VERSION VERSION_LESS 3.12) + file( + GLOB_RECURSE _pybind11_header_check + LIST_DIRECTORIES false + RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" + CONFIGURE_DEPENDS "include/pybind11/*.h") + set(_pybind11_here_only ${PYBIND11_HEADERS}) + set(_pybind11_disk_only ${_pybind11_header_check}) + list(REMOVE_ITEM _pybind11_here_only ${_pybind11_header_check}) + list(REMOVE_ITEM _pybind11_disk_only ${PYBIND11_HEADERS}) + if(_pybind11_here_only) + message(AUTHOR_WARNING "PYBIND11_HEADERS has extra files:" ${_pybind11_here_only}) + endif() + if(_pybind11_disk_only) + message(AUTHOR_WARNING "PYBIND11_HEADERS is missing files:" ${_pybind11_disk_only}) + endif() +endif() + +# CMake 3.12 added list(TRANSFORM PREPEND +# But we can't use it yet +string(REPLACE "include/" "${CMAKE_CURRENT_SOURCE_DIR}/include/" PYBIND11_HEADERS + "${PYBIND11_HEADERS}") + +# Cache variable so this can be used in parent projects +set(pybind11_INCLUDE_DIR + "${CMAKE_CURRENT_LIST_DIR}/include" + CACHE INTERNAL "Directory where pybind11 headers are located") + +# Backward compatible variable for add_subdirectory mode +if(NOT PYBIND11_MASTER_PROJECT) + set(PYBIND11_INCLUDE_DIR + "${pybind11_INCLUDE_DIR}" + CACHE INTERNAL "") +endif() + +# Note: when creating targets, you cannot use if statements at configure time - +# you need generator expressions, because those will be placed in the target file. +# You can also place ifs *in* the Config.in, but not here. + +# This section builds targets, but does *not* touch Python +# Non-IMPORT targets cannot be defined twice +if(NOT TARGET pybind11_headers) + # Build the headers-only target (no Python included): + # (long name used here to keep this from clashing in subdirectory mode) + add_library(pybind11_headers INTERFACE) + add_library(pybind11::pybind11_headers ALIAS pybind11_headers) # to match exported target + add_library(pybind11::headers ALIAS pybind11_headers) # easier to use/remember + + target_include_directories( + pybind11_headers ${pybind11_system} INTERFACE $ + $) + + target_compile_features(pybind11_headers INTERFACE cxx_inheriting_constructors cxx_user_literals + cxx_right_angle_brackets) + if(NOT "${PYBIND11_INTERNALS_VERSION}" STREQUAL "") + target_compile_definitions( + pybind11_headers INTERFACE "PYBIND11_INTERNALS_VERSION=${PYBIND11_INTERNALS_VERSION}") + endif() +else() + # It is invalid to install a target twice, too. + set(PYBIND11_INSTALL OFF) +endif() + +include("${CMAKE_CURRENT_SOURCE_DIR}/tools/pybind11Common.cmake") + +# Relative directory setting +if(USE_PYTHON_INCLUDE_DIR AND DEFINED Python_INCLUDE_DIRS) + file(RELATIVE_PATH CMAKE_INSTALL_INCLUDEDIR ${CMAKE_INSTALL_PREFIX} ${Python_INCLUDE_DIRS}) +elseif(USE_PYTHON_INCLUDE_DIR AND DEFINED PYTHON_INCLUDE_DIR) + file(RELATIVE_PATH CMAKE_INSTALL_INCLUDEDIR ${CMAKE_INSTALL_PREFIX} ${PYTHON_INCLUDE_DIRS}) +endif() + +if(PYBIND11_INSTALL) + install(DIRECTORY ${pybind11_INCLUDE_DIR}/pybind11 DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) + set(PYBIND11_CMAKECONFIG_INSTALL_DIR + "${CMAKE_INSTALL_DATAROOTDIR}/cmake/${PROJECT_NAME}" + CACHE STRING "install path for pybind11Config.cmake") + + if(IS_ABSOLUTE "${CMAKE_INSTALL_INCLUDEDIR}") + set(pybind11_INCLUDEDIR "${CMAKE_INSTALL_FULL_INCLUDEDIR}") + else() + set(pybind11_INCLUDEDIR "\$\{PACKAGE_PREFIX_DIR\}/${CMAKE_INSTALL_INCLUDEDIR}") + endif() + + configure_package_config_file( + tools/${PROJECT_NAME}Config.cmake.in "${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config.cmake" + INSTALL_DESTINATION ${PYBIND11_CMAKECONFIG_INSTALL_DIR}) + + if(CMAKE_VERSION VERSION_LESS 3.14) + # Remove CMAKE_SIZEOF_VOID_P from ConfigVersion.cmake since the library does + # not depend on architecture specific settings or libraries. + set(_PYBIND11_CMAKE_SIZEOF_VOID_P ${CMAKE_SIZEOF_VOID_P}) + unset(CMAKE_SIZEOF_VOID_P) + + write_basic_package_version_file( + ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake + VERSION ${PROJECT_VERSION} + COMPATIBILITY AnyNewerVersion) + + set(CMAKE_SIZEOF_VOID_P ${_PYBIND11_CMAKE_SIZEOF_VOID_P}) + else() + # CMake 3.14+ natively supports header-only libraries + write_basic_package_version_file( + ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake + VERSION ${PROJECT_VERSION} + COMPATIBILITY AnyNewerVersion ARCH_INDEPENDENT) + endif() + + install( + FILES ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config.cmake + ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake + tools/FindPythonLibsNew.cmake + tools/pybind11Common.cmake + tools/pybind11Tools.cmake + tools/pybind11NewTools.cmake + DESTINATION ${PYBIND11_CMAKECONFIG_INSTALL_DIR}) + + if(NOT PYBIND11_EXPORT_NAME) + set(PYBIND11_EXPORT_NAME "${PROJECT_NAME}Targets") + endif() + + install(TARGETS pybind11_headers EXPORT "${PYBIND11_EXPORT_NAME}") + + install( + EXPORT "${PYBIND11_EXPORT_NAME}" + NAMESPACE "pybind11::" + DESTINATION ${PYBIND11_CMAKECONFIG_INSTALL_DIR}) + + # Uninstall target + if(PYBIND11_MASTER_PROJECT) + configure_file("${CMAKE_CURRENT_SOURCE_DIR}/tools/cmake_uninstall.cmake.in" + "${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake" IMMEDIATE @ONLY) + + add_custom_target(uninstall COMMAND ${CMAKE_COMMAND} -P + ${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake) + endif() +endif() + +# BUILD_TESTING takes priority, but only if this is the master project +if(PYBIND11_MASTER_PROJECT AND DEFINED BUILD_TESTING) + if(BUILD_TESTING) + if(_pybind11_nopython) + message(FATAL_ERROR "Cannot activate tests in NOPYTHON mode") + else() + add_subdirectory(tests) + endif() + endif() +else() + if(PYBIND11_TEST) + if(_pybind11_nopython) + message(FATAL_ERROR "Cannot activate tests in NOPYTHON mode") + else() + add_subdirectory(tests) + endif() + endif() +endif() + +# Better symmetry with find_package(pybind11 CONFIG) mode. +if(NOT PYBIND11_MASTER_PROJECT) + set(pybind11_FOUND + TRUE + CACHE INTERNAL "True if pybind11 and all required components found on the system") +endif() diff --git a/third_party/pybind11/LICENSE b/third_party/pybind11/LICENSE new file mode 100644 index 0000000000..e466b0dfda --- /dev/null +++ b/third_party/pybind11/LICENSE @@ -0,0 +1,29 @@ +Copyright (c) 2016 Wenzel Jakob , All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Please also refer to the file .github/CONTRIBUTING.md, which clarifies licensing of +external contributions to this project including patches, pull requests, etc. diff --git a/third_party/pybind11/MANIFEST.in b/third_party/pybind11/MANIFEST.in new file mode 100644 index 0000000000..033303a74a --- /dev/null +++ b/third_party/pybind11/MANIFEST.in @@ -0,0 +1,5 @@ +recursive-include pybind11/include/pybind11 *.h +recursive-include pybind11 *.py +recursive-include pybind11 py.typed +include pybind11/share/cmake/pybind11/*.cmake +include LICENSE README.rst pyproject.toml setup.py setup.cfg diff --git a/third_party/pybind11/README.rst b/third_party/pybind11/README.rst new file mode 100644 index 0000000000..3c75edb575 --- /dev/null +++ b/third_party/pybind11/README.rst @@ -0,0 +1,180 @@ +.. figure:: https://github.com/pybind/pybind11/raw/master/docs/pybind11-logo.png + :alt: pybind11 logo + +**pybind11 — Seamless operability between C++11 and Python** + +|Latest Documentation Status| |Stable Documentation Status| |Gitter chat| |GitHub Discussions| |CI| |Build status| + +|Repology| |PyPI package| |Conda-forge| |Python Versions| + +`Setuptools example `_ +• `Scikit-build example `_ +• `CMake example `_ + +.. start + + +**pybind11** is a lightweight header-only library that exposes C++ types +in Python and vice versa, mainly to create Python bindings of existing +C++ code. Its goals and syntax are similar to the excellent +`Boost.Python `_ +library by David Abrahams: to minimize boilerplate code in traditional +extension modules by inferring type information using compile-time +introspection. + +The main issue with Boost.Python—and the reason for creating such a +similar project—is Boost. Boost is an enormously large and complex suite +of utility libraries that works with almost every C++ compiler in +existence. This compatibility has its cost: arcane template tricks and +workarounds are necessary to support the oldest and buggiest of compiler +specimens. Now that C++11-compatible compilers are widely available, +this heavy machinery has become an excessively large and unnecessary +dependency. + +Think of this library as a tiny self-contained version of Boost.Python +with everything stripped away that isn't relevant for binding +generation. Without comments, the core header files only require ~4K +lines of code and depend on Python (3.6+, or PyPy) and the C++ +standard library. This compact implementation was possible thanks to +some of the new C++11 language features (specifically: tuples, lambda +functions and variadic templates). Since its creation, this library has +grown beyond Boost.Python in many ways, leading to dramatically simpler +binding code in many common situations. + +Tutorial and reference documentation is provided at +`pybind11.readthedocs.io `_. +A PDF version of the manual is available +`here `_. +And the source code is always available at +`github.com/pybind/pybind11 `_. + + +Core features +------------- + + +pybind11 can map the following core C++ features to Python: + +- Functions accepting and returning custom data structures per value, + reference, or pointer +- Instance methods and static methods +- Overloaded functions +- Instance attributes and static attributes +- Arbitrary exception types +- Enumerations +- Callbacks +- Iterators and ranges +- Custom operators +- Single and multiple inheritance +- STL data structures +- Smart pointers with reference counting like ``std::shared_ptr`` +- Internal references with correct reference counting +- C++ classes with virtual (and pure virtual) methods can be extended + in Python + +Goodies +------- + +In addition to the core functionality, pybind11 provides some extra +goodies: + +- Python 3.6+, and PyPy3 7.3 are supported with an implementation-agnostic + interface (pybind11 2.9 was the last version to support Python 2 and 3.5). + +- It is possible to bind C++11 lambda functions with captured + variables. The lambda capture data is stored inside the resulting + Python function object. + +- pybind11 uses C++11 move constructors and move assignment operators + whenever possible to efficiently transfer custom data types. + +- It's easy to expose the internal storage of custom data types through + Pythons' buffer protocols. This is handy e.g. for fast conversion + between C++ matrix classes like Eigen and NumPy without expensive + copy operations. + +- pybind11 can automatically vectorize functions so that they are + transparently applied to all entries of one or more NumPy array + arguments. + +- Python's slice-based access and assignment operations can be + supported with just a few lines of code. + +- Everything is contained in just a few header files; there is no need + to link against any additional libraries. + +- Binaries are generally smaller by a factor of at least 2 compared to + equivalent bindings generated by Boost.Python. A recent pybind11 + conversion of PyRosetta, an enormous Boost.Python binding project, + `reported `_ + a binary size reduction of **5.4x** and compile time reduction by + **5.8x**. + +- Function signatures are precomputed at compile time (using + ``constexpr``), leading to smaller binaries. + +- With little extra effort, C++ types can be pickled and unpickled + similar to regular Python objects. + +Supported compilers +------------------- + +1. Clang/LLVM 3.3 or newer (for Apple Xcode's clang, this is 5.0.0 or + newer) +2. GCC 4.8 or newer +3. Microsoft Visual Studio 2017 or newer +4. Intel classic C++ compiler 18 or newer (ICC 20.2 tested in CI) +5. Cygwin/GCC (previously tested on 2.5.1) +6. NVCC (CUDA 11.0 tested in CI) +7. NVIDIA PGI (20.9 tested in CI) + +About +----- + +This project was created by `Wenzel +Jakob `_. Significant features and/or +improvements to the code were contributed by Jonas Adler, Lori A. Burns, +Sylvain Corlay, Eric Cousineau, Aaron Gokaslan, Ralf Grosse-Kunstleve, Trent Houliston, Axel +Huebl, @hulucc, Yannick Jadoul, Sergey Lyskov Johan Mabille, Tomasz Miąsko, +Dean Moldovan, Ben Pritchard, Jason Rhinelander, Boris Schäling, Pim +Schellart, Henry Schreiner, Ivan Smirnov, Boris Staletic, and Patrick Stewart. + +We thank Google for a generous financial contribution to the continuous +integration infrastructure used by this project. + + +Contributing +~~~~~~~~~~~~ + +See the `contributing +guide `_ +for information on building and contributing to pybind11. + +License +~~~~~~~ + +pybind11 is provided under a BSD-style license that can be found in the +`LICENSE `_ +file. By using, distributing, or contributing to this project, you agree +to the terms and conditions of this license. + +.. |Latest Documentation Status| image:: https://readthedocs.org/projects/pybind11/badge?version=latest + :target: http://pybind11.readthedocs.org/en/latest +.. |Stable Documentation Status| image:: https://img.shields.io/badge/docs-stable-blue.svg + :target: http://pybind11.readthedocs.org/en/stable +.. |Gitter chat| image:: https://img.shields.io/gitter/room/gitterHQ/gitter.svg + :target: https://gitter.im/pybind/Lobby +.. |CI| image:: https://github.com/pybind/pybind11/workflows/CI/badge.svg + :target: https://github.com/pybind/pybind11/actions +.. |Build status| image:: https://ci.appveyor.com/api/projects/status/riaj54pn4h08xy40?svg=true + :target: https://ci.appveyor.com/project/wjakob/pybind11 +.. |PyPI package| image:: https://img.shields.io/pypi/v/pybind11.svg + :target: https://pypi.org/project/pybind11/ +.. |Conda-forge| image:: https://img.shields.io/conda/vn/conda-forge/pybind11.svg + :target: https://github.com/conda-forge/pybind11-feedstock +.. |Repology| image:: https://repology.org/badge/latest-versions/python:pybind11.svg + :target: https://repology.org/project/python:pybind11/versions +.. |Python Versions| image:: https://img.shields.io/pypi/pyversions/pybind11.svg + :target: https://pypi.org/project/pybind11/ +.. |GitHub Discussions| image:: https://img.shields.io/static/v1?label=Discussions&message=Ask&color=blue&logo=github + :target: https://github.com/pybind/pybind11/discussions diff --git a/third_party/pybind11/docs/Doxyfile b/third_party/pybind11/docs/Doxyfile new file mode 100644 index 0000000000..09138db364 --- /dev/null +++ b/third_party/pybind11/docs/Doxyfile @@ -0,0 +1,21 @@ +PROJECT_NAME = pybind11 +INPUT = ../include/pybind11/ +RECURSIVE = YES + +GENERATE_HTML = NO +GENERATE_LATEX = NO +GENERATE_XML = YES +XML_OUTPUT = .build/doxygenxml +XML_PROGRAMLISTING = YES + +MACRO_EXPANSION = YES +EXPAND_ONLY_PREDEF = YES +EXPAND_AS_DEFINED = PYBIND11_RUNTIME_EXCEPTION + +ALIASES = "rst=\verbatim embed:rst" +ALIASES += "endrst=\endverbatim" + +QUIET = YES +WARNINGS = YES +WARN_IF_UNDOCUMENTED = NO +PREDEFINED = PYBIND11_NOINLINE diff --git a/third_party/pybind11/docs/_static/theme_overrides.css b/third_party/pybind11/docs/_static/theme_overrides.css new file mode 100644 index 0000000000..1071809fa0 --- /dev/null +++ b/third_party/pybind11/docs/_static/theme_overrides.css @@ -0,0 +1,11 @@ +.wy-table-responsive table td, +.wy-table-responsive table th { + white-space: initial !important; +} +.rst-content table.docutils td { + vertical-align: top !important; +} +div[class^='highlight'] pre { + white-space: pre; + white-space: pre-wrap; +} diff --git a/third_party/pybind11/docs/advanced/cast/chrono.rst b/third_party/pybind11/docs/advanced/cast/chrono.rst new file mode 100644 index 0000000000..fbd46057aa --- /dev/null +++ b/third_party/pybind11/docs/advanced/cast/chrono.rst @@ -0,0 +1,81 @@ +Chrono +====== + +When including the additional header file :file:`pybind11/chrono.h` conversions +from C++11 chrono datatypes to python datetime objects are automatically enabled. +This header also enables conversions of python floats (often from sources such +as ``time.monotonic()``, ``time.perf_counter()`` and ``time.process_time()``) +into durations. + +An overview of clocks in C++11 +------------------------------ + +A point of confusion when using these conversions is the differences between +clocks provided in C++11. There are three clock types defined by the C++11 +standard and users can define their own if needed. Each of these clocks have +different properties and when converting to and from python will give different +results. + +The first clock defined by the standard is ``std::chrono::system_clock``. This +clock measures the current date and time. However, this clock changes with to +updates to the operating system time. For example, if your time is synchronised +with a time server this clock will change. This makes this clock a poor choice +for timing purposes but good for measuring the wall time. + +The second clock defined in the standard is ``std::chrono::steady_clock``. +This clock ticks at a steady rate and is never adjusted. This makes it excellent +for timing purposes, however the value in this clock does not correspond to the +current date and time. Often this clock will be the amount of time your system +has been on, although it does not have to be. This clock will never be the same +clock as the system clock as the system clock can change but steady clocks +cannot. + +The third clock defined in the standard is ``std::chrono::high_resolution_clock``. +This clock is the clock that has the highest resolution out of the clocks in the +system. It is normally a typedef to either the system clock or the steady clock +but can be its own independent clock. This is important as when using these +conversions as the types you get in python for this clock might be different +depending on the system. +If it is a typedef of the system clock, python will get datetime objects, but if +it is a different clock they will be timedelta objects. + +Provided conversions +-------------------- + +.. rubric:: C++ to Python + +- ``std::chrono::system_clock::time_point`` → ``datetime.datetime`` + System clock times are converted to python datetime instances. They are + in the local timezone, but do not have any timezone information attached + to them (they are naive datetime objects). + +- ``std::chrono::duration`` → ``datetime.timedelta`` + Durations are converted to timedeltas, any precision in the duration + greater than microseconds is lost by rounding towards zero. + +- ``std::chrono::[other_clocks]::time_point`` → ``datetime.timedelta`` + Any clock time that is not the system clock is converted to a time delta. + This timedelta measures the time from the clocks epoch to now. + +.. rubric:: Python to C++ + +- ``datetime.datetime`` or ``datetime.date`` or ``datetime.time`` → ``std::chrono::system_clock::time_point`` + Date/time objects are converted into system clock timepoints. Any + timezone information is ignored and the type is treated as a naive + object. + +- ``datetime.timedelta`` → ``std::chrono::duration`` + Time delta are converted into durations with microsecond precision. + +- ``datetime.timedelta`` → ``std::chrono::[other_clocks]::time_point`` + Time deltas that are converted into clock timepoints are treated as + the amount of time from the start of the clocks epoch. + +- ``float`` → ``std::chrono::duration`` + Floats that are passed to C++ as durations be interpreted as a number of + seconds. These will be converted to the duration using ``duration_cast`` + from the float. + +- ``float`` → ``std::chrono::[other_clocks]::time_point`` + Floats that are passed to C++ as time points will be interpreted as the + number of seconds from the start of the clocks epoch. diff --git a/third_party/pybind11/docs/advanced/cast/custom.rst b/third_party/pybind11/docs/advanced/cast/custom.rst new file mode 100644 index 0000000000..1df4d3e14b --- /dev/null +++ b/third_party/pybind11/docs/advanced/cast/custom.rst @@ -0,0 +1,93 @@ +Custom type casters +=================== + +In very rare cases, applications may require custom type casters that cannot be +expressed using the abstractions provided by pybind11, thus requiring raw +Python C API calls. This is fairly advanced usage and should only be pursued by +experts who are familiar with the intricacies of Python reference counting. + +The following snippets demonstrate how this works for a very simple ``inty`` +type that that should be convertible from Python types that provide a +``__int__(self)`` method. + +.. code-block:: cpp + + struct inty { long long_value; }; + + void print(inty s) { + std::cout << s.long_value << std::endl; + } + +The following Python snippet demonstrates the intended usage from the Python side: + +.. code-block:: python + + class A: + def __int__(self): + return 123 + + + from example import print + + print(A()) + +To register the necessary conversion routines, it is necessary to add an +instantiation of the ``pybind11::detail::type_caster`` template. +Although this is an implementation detail, adding an instantiation of this +type is explicitly allowed. + +.. code-block:: cpp + + namespace pybind11 { namespace detail { + template <> struct type_caster { + public: + /** + * This macro establishes the name 'inty' in + * function signatures and declares a local variable + * 'value' of type inty + */ + PYBIND11_TYPE_CASTER(inty, const_name("inty")); + + /** + * Conversion part 1 (Python->C++): convert a PyObject into a inty + * instance or return false upon failure. The second argument + * indicates whether implicit conversions should be applied. + */ + bool load(handle src, bool) { + /* Extract PyObject from handle */ + PyObject *source = src.ptr(); + /* Try converting into a Python integer value */ + PyObject *tmp = PyNumber_Long(source); + if (!tmp) + return false; + /* Now try to convert into a C++ int */ + value.long_value = PyLong_AsLong(tmp); + Py_DECREF(tmp); + /* Ensure return code was OK (to avoid out-of-range errors etc) */ + return !(value.long_value == -1 && !PyErr_Occurred()); + } + + /** + * Conversion part 2 (C++ -> Python): convert an inty instance into + * a Python object. The second and third arguments are used to + * indicate the return value policy and parent object (for + * ``return_value_policy::reference_internal``) and are generally + * ignored by implicit casters. + */ + static handle cast(inty src, return_value_policy /* policy */, handle /* parent */) { + return PyLong_FromLong(src.long_value); + } + }; + }} // namespace pybind11::detail + +.. note:: + + A ``type_caster`` defined with ``PYBIND11_TYPE_CASTER(T, ...)`` requires + that ``T`` is default-constructible (``value`` is first default constructed + and then ``load()`` assigns to it). + +.. warning:: + + When using custom type casters, it's important to declare them consistently + in every compilation unit of the Python extension module. Otherwise, + undefined behavior can ensue. diff --git a/third_party/pybind11/docs/advanced/cast/eigen.rst b/third_party/pybind11/docs/advanced/cast/eigen.rst new file mode 100644 index 0000000000..a5c11a3f14 --- /dev/null +++ b/third_party/pybind11/docs/advanced/cast/eigen.rst @@ -0,0 +1,310 @@ +Eigen +##### + +`Eigen `_ is C++ header-based library for dense and +sparse linear algebra. Due to its popularity and widespread adoption, pybind11 +provides transparent conversion and limited mapping support between Eigen and +Scientific Python linear algebra data types. + +To enable the built-in Eigen support you must include the optional header file +:file:`pybind11/eigen.h`. + +Pass-by-value +============= + +When binding a function with ordinary Eigen dense object arguments (for +example, ``Eigen::MatrixXd``), pybind11 will accept any input value that is +already (or convertible to) a ``numpy.ndarray`` with dimensions compatible with +the Eigen type, copy its values into a temporary Eigen variable of the +appropriate type, then call the function with this temporary variable. + +Sparse matrices are similarly copied to or from +``scipy.sparse.csr_matrix``/``scipy.sparse.csc_matrix`` objects. + +Pass-by-reference +================= + +One major limitation of the above is that every data conversion implicitly +involves a copy, which can be both expensive (for large matrices) and disallows +binding functions that change their (Matrix) arguments. Pybind11 allows you to +work around this by using Eigen's ``Eigen::Ref`` class much as you +would when writing a function taking a generic type in Eigen itself (subject to +some limitations discussed below). + +When calling a bound function accepting a ``Eigen::Ref`` +type, pybind11 will attempt to avoid copying by using an ``Eigen::Map`` object +that maps into the source ``numpy.ndarray`` data: this requires both that the +data types are the same (e.g. ``dtype='float64'`` and ``MatrixType::Scalar`` is +``double``); and that the storage is layout compatible. The latter limitation +is discussed in detail in the section below, and requires careful +consideration: by default, numpy matrices and Eigen matrices are *not* storage +compatible. + +If the numpy matrix cannot be used as is (either because its types differ, e.g. +passing an array of integers to an Eigen parameter requiring doubles, or +because the storage is incompatible), pybind11 makes a temporary copy and +passes the copy instead. + +When a bound function parameter is instead ``Eigen::Ref`` (note the +lack of ``const``), pybind11 will only allow the function to be called if it +can be mapped *and* if the numpy array is writeable (that is +``a.flags.writeable`` is true). Any access (including modification) made to +the passed variable will be transparently carried out directly on the +``numpy.ndarray``. + +This means you can write code such as the following and have it work as +expected: + +.. code-block:: cpp + + void scale_by_2(Eigen::Ref v) { + v *= 2; + } + +Note, however, that you will likely run into limitations due to numpy and +Eigen's difference default storage order for data; see the below section on +:ref:`storage_orders` for details on how to bind code that won't run into such +limitations. + +.. note:: + + Passing by reference is not supported for sparse types. + +Returning values to Python +========================== + +When returning an ordinary dense Eigen matrix type to numpy (e.g. +``Eigen::MatrixXd`` or ``Eigen::RowVectorXf``) pybind11 keeps the matrix and +returns a numpy array that directly references the Eigen matrix: no copy of the +data is performed. The numpy array will have ``array.flags.owndata`` set to +``False`` to indicate that it does not own the data, and the lifetime of the +stored Eigen matrix will be tied to the returned ``array``. + +If you bind a function with a non-reference, ``const`` return type (e.g. +``const Eigen::MatrixXd``), the same thing happens except that pybind11 also +sets the numpy array's ``writeable`` flag to false. + +If you return an lvalue reference or pointer, the usual pybind11 rules apply, +as dictated by the binding function's return value policy (see the +documentation on :ref:`return_value_policies` for full details). That means, +without an explicit return value policy, lvalue references will be copied and +pointers will be managed by pybind11. In order to avoid copying, you should +explicitly specify an appropriate return value policy, as in the following +example: + +.. code-block:: cpp + + class MyClass { + Eigen::MatrixXd big_mat = Eigen::MatrixXd::Zero(10000, 10000); + public: + Eigen::MatrixXd &getMatrix() { return big_mat; } + const Eigen::MatrixXd &viewMatrix() { return big_mat; } + }; + + // Later, in binding code: + py::class_(m, "MyClass") + .def(py::init<>()) + .def("copy_matrix", &MyClass::getMatrix) // Makes a copy! + .def("get_matrix", &MyClass::getMatrix, py::return_value_policy::reference_internal) + .def("view_matrix", &MyClass::viewMatrix, py::return_value_policy::reference_internal) + ; + +.. code-block:: python + + a = MyClass() + m = a.get_matrix() # flags.writeable = True, flags.owndata = False + v = a.view_matrix() # flags.writeable = False, flags.owndata = False + c = a.copy_matrix() # flags.writeable = True, flags.owndata = True + # m[5,6] and v[5,6] refer to the same element, c[5,6] does not. + +Note in this example that ``py::return_value_policy::reference_internal`` is +used to tie the life of the MyClass object to the life of the returned arrays. + +You may also return an ``Eigen::Ref``, ``Eigen::Map`` or other map-like Eigen +object (for example, the return value of ``matrix.block()`` and related +methods) that map into a dense Eigen type. When doing so, the default +behaviour of pybind11 is to simply reference the returned data: you must take +care to ensure that this data remains valid! You may ask pybind11 to +explicitly *copy* such a return value by using the +``py::return_value_policy::copy`` policy when binding the function. You may +also use ``py::return_value_policy::reference_internal`` or a +``py::keep_alive`` to ensure the data stays valid as long as the returned numpy +array does. + +When returning such a reference of map, pybind11 additionally respects the +readonly-status of the returned value, marking the numpy array as non-writeable +if the reference or map was itself read-only. + +.. note:: + + Sparse types are always copied when returned. + +.. _storage_orders: + +Storage orders +============== + +Passing arguments via ``Eigen::Ref`` has some limitations that you must be +aware of in order to effectively pass matrices by reference. First and +foremost is that the default ``Eigen::Ref`` class requires +contiguous storage along columns (for column-major types, the default in Eigen) +or rows if ``MatrixType`` is specifically an ``Eigen::RowMajor`` storage type. +The former, Eigen's default, is incompatible with ``numpy``'s default row-major +storage, and so you will not be able to pass numpy arrays to Eigen by reference +without making one of two changes. + +(Note that this does not apply to vectors (or column or row matrices): for such +types the "row-major" and "column-major" distinction is meaningless). + +The first approach is to change the use of ``Eigen::Ref`` to the +more general ``Eigen::Ref>`` (or similar type with a fully dynamic stride type in the +third template argument). Since this is a rather cumbersome type, pybind11 +provides a ``py::EigenDRef`` type alias for your convenience (along +with EigenDMap for the equivalent Map, and EigenDStride for just the stride +type). + +This type allows Eigen to map into any arbitrary storage order. This is not +the default in Eigen for performance reasons: contiguous storage allows +vectorization that cannot be done when storage is not known to be contiguous at +compile time. The default ``Eigen::Ref`` stride type allows non-contiguous +storage along the outer dimension (that is, the rows of a column-major matrix +or columns of a row-major matrix), but not along the inner dimension. + +This type, however, has the added benefit of also being able to map numpy array +slices. For example, the following (contrived) example uses Eigen with a numpy +slice to multiply by 2 all coefficients that are both on even rows (0, 2, 4, +...) and in columns 2, 5, or 8: + +.. code-block:: cpp + + m.def("scale", [](py::EigenDRef m, double c) { m *= c; }); + +.. code-block:: python + + # a = np.array(...) + scale_by_2(myarray[0::2, 2:9:3]) + +The second approach to avoid copying is more intrusive: rearranging the +underlying data types to not run into the non-contiguous storage problem in the +first place. In particular, that means using matrices with ``Eigen::RowMajor`` +storage, where appropriate, such as: + +.. code-block:: cpp + + using RowMatrixXd = Eigen::Matrix; + // Use RowMatrixXd instead of MatrixXd + +Now bound functions accepting ``Eigen::Ref`` arguments will be +callable with numpy's (default) arrays without involving a copying. + +You can, alternatively, change the storage order that numpy arrays use by +adding the ``order='F'`` option when creating an array: + +.. code-block:: python + + myarray = np.array(source, order="F") + +Such an object will be passable to a bound function accepting an +``Eigen::Ref`` (or similar column-major Eigen type). + +One major caveat with this approach, however, is that it is not entirely as +easy as simply flipping all Eigen or numpy usage from one to the other: some +operations may alter the storage order of a numpy array. For example, ``a2 = +array.transpose()`` results in ``a2`` being a view of ``array`` that references +the same data, but in the opposite storage order! + +While this approach allows fully optimized vectorized calculations in Eigen, it +cannot be used with array slices, unlike the first approach. + +When *returning* a matrix to Python (either a regular matrix, a reference via +``Eigen::Ref<>``, or a map/block into a matrix), no special storage +consideration is required: the created numpy array will have the required +stride that allows numpy to properly interpret the array, whatever its storage +order. + +Failing rather than copying +=========================== + +The default behaviour when binding ``Eigen::Ref`` Eigen +references is to copy matrix values when passed a numpy array that does not +conform to the element type of ``MatrixType`` or does not have a compatible +stride layout. If you want to explicitly avoid copying in such a case, you +should bind arguments using the ``py::arg().noconvert()`` annotation (as +described in the :ref:`nonconverting_arguments` documentation). + +The following example shows an example of arguments that don't allow data +copying to take place: + +.. code-block:: cpp + + // The method and function to be bound: + class MyClass { + // ... + double some_method(const Eigen::Ref &matrix) { /* ... */ } + }; + float some_function(const Eigen::Ref &big, + const Eigen::Ref &small) { + // ... + } + + // The associated binding code: + using namespace pybind11::literals; // for "arg"_a + py::class_(m, "MyClass") + // ... other class definitions + .def("some_method", &MyClass::some_method, py::arg().noconvert()); + + m.def("some_function", &some_function, + "big"_a.noconvert(), // <- Don't allow copying for this arg + "small"_a // <- This one can be copied if needed + ); + +With the above binding code, attempting to call the the ``some_method(m)`` +method on a ``MyClass`` object, or attempting to call ``some_function(m, m2)`` +will raise a ``RuntimeError`` rather than making a temporary copy of the array. +It will, however, allow the ``m2`` argument to be copied into a temporary if +necessary. + +Note that explicitly specifying ``.noconvert()`` is not required for *mutable* +Eigen references (e.g. ``Eigen::Ref`` without ``const`` on the +``MatrixXd``): mutable references will never be called with a temporary copy. + +Vectors versus column/row matrices +================================== + +Eigen and numpy have fundamentally different notions of a vector. In Eigen, a +vector is simply a matrix with the number of columns or rows set to 1 at +compile time (for a column vector or row vector, respectively). NumPy, in +contrast, has comparable 2-dimensional 1xN and Nx1 arrays, but *also* has +1-dimensional arrays of size N. + +When passing a 2-dimensional 1xN or Nx1 array to Eigen, the Eigen type must +have matching dimensions: That is, you cannot pass a 2-dimensional Nx1 numpy +array to an Eigen value expecting a row vector, or a 1xN numpy array as a +column vector argument. + +On the other hand, pybind11 allows you to pass 1-dimensional arrays of length N +as Eigen parameters. If the Eigen type can hold a column vector of length N it +will be passed as such a column vector. If not, but the Eigen type constraints +will accept a row vector, it will be passed as a row vector. (The column +vector takes precedence when both are supported, for example, when passing a +1D numpy array to a MatrixXd argument). Note that the type need not be +explicitly a vector: it is permitted to pass a 1D numpy array of size 5 to an +Eigen ``Matrix``: you would end up with a 1x5 Eigen matrix. +Passing the same to an ``Eigen::MatrixXd`` would result in a 5x1 Eigen matrix. + +When returning an Eigen vector to numpy, the conversion is ambiguous: a row +vector of length 4 could be returned as either a 1D array of length 4, or as a +2D array of size 1x4. When encountering such a situation, pybind11 compromises +by considering the returned Eigen type: if it is a compile-time vector--that +is, the type has either the number of rows or columns set to 1 at compile +time--pybind11 converts to a 1D numpy array when returning the value. For +instances that are a vector only at run-time (e.g. ``MatrixXd``, +``Matrix``), pybind11 returns the vector as a 2D array to +numpy. If this isn't want you want, you can use ``array.reshape(...)`` to get +a view of the same data in the desired dimensions. + +.. seealso:: + + The file :file:`tests/test_eigen.cpp` contains a complete example that + shows how to pass Eigen sparse and dense data types in more detail. diff --git a/third_party/pybind11/docs/advanced/cast/functional.rst b/third_party/pybind11/docs/advanced/cast/functional.rst new file mode 100644 index 0000000000..d9b4605759 --- /dev/null +++ b/third_party/pybind11/docs/advanced/cast/functional.rst @@ -0,0 +1,109 @@ +Functional +########## + +The following features must be enabled by including :file:`pybind11/functional.h`. + + +Callbacks and passing anonymous functions +========================================= + +The C++11 standard brought lambda functions and the generic polymorphic +function wrapper ``std::function<>`` to the C++ programming language, which +enable powerful new ways of working with functions. Lambda functions come in +two flavors: stateless lambda function resemble classic function pointers that +link to an anonymous piece of code, while stateful lambda functions +additionally depend on captured variables that are stored in an anonymous +*lambda closure object*. + +Here is a simple example of a C++ function that takes an arbitrary function +(stateful or stateless) with signature ``int -> int`` as an argument and runs +it with the value 10. + +.. code-block:: cpp + + int func_arg(const std::function &f) { + return f(10); + } + +The example below is more involved: it takes a function of signature ``int -> int`` +and returns another function of the same kind. The return value is a stateful +lambda function, which stores the value ``f`` in the capture object and adds 1 to +its return value upon execution. + +.. code-block:: cpp + + std::function func_ret(const std::function &f) { + return [f](int i) { + return f(i) + 1; + }; + } + +This example demonstrates using python named parameters in C++ callbacks which +requires using ``py::cpp_function`` as a wrapper. Usage is similar to defining +methods of classes: + +.. code-block:: cpp + + py::cpp_function func_cpp() { + return py::cpp_function([](int i) { return i+1; }, + py::arg("number")); + } + +After including the extra header file :file:`pybind11/functional.h`, it is almost +trivial to generate binding code for all of these functions. + +.. code-block:: cpp + + #include + + PYBIND11_MODULE(example, m) { + m.def("func_arg", &func_arg); + m.def("func_ret", &func_ret); + m.def("func_cpp", &func_cpp); + } + +The following interactive session shows how to call them from Python. + +.. code-block:: pycon + + $ python + >>> import example + >>> def square(i): + ... return i * i + ... + >>> example.func_arg(square) + 100L + >>> square_plus_1 = example.func_ret(square) + >>> square_plus_1(4) + 17L + >>> plus_1 = func_cpp() + >>> plus_1(number=43) + 44L + +.. warning:: + + Keep in mind that passing a function from C++ to Python (or vice versa) + will instantiate a piece of wrapper code that translates function + invocations between the two languages. Naturally, this translation + increases the computational cost of each function call somewhat. A + problematic situation can arise when a function is copied back and forth + between Python and C++ many times in a row, in which case the underlying + wrappers will accumulate correspondingly. The resulting long sequence of + C++ -> Python -> C++ -> ... roundtrips can significantly decrease + performance. + + There is one exception: pybind11 detects case where a stateless function + (i.e. a function pointer or a lambda function without captured variables) + is passed as an argument to another C++ function exposed in Python. In this + case, there is no overhead. Pybind11 will extract the underlying C++ + function pointer from the wrapped function to sidestep a potential C++ -> + Python -> C++ roundtrip. This is demonstrated in :file:`tests/test_callbacks.cpp`. + +.. note:: + + This functionality is very useful when generating bindings for callbacks in + C++ libraries (e.g. GUI libraries, asynchronous networking libraries, etc.). + + The file :file:`tests/test_callbacks.cpp` contains a complete example + that demonstrates how to work with callbacks and anonymous functions in + more detail. diff --git a/third_party/pybind11/docs/advanced/cast/index.rst b/third_party/pybind11/docs/advanced/cast/index.rst new file mode 100644 index 0000000000..3ce9ea0286 --- /dev/null +++ b/third_party/pybind11/docs/advanced/cast/index.rst @@ -0,0 +1,43 @@ +.. _type-conversions: + +Type conversions +################ + +Apart from enabling cross-language function calls, a fundamental problem +that a binding tool like pybind11 must address is to provide access to +native Python types in C++ and vice versa. There are three fundamentally +different ways to do this—which approach is preferable for a particular type +depends on the situation at hand. + +1. Use a native C++ type everywhere. In this case, the type must be wrapped + using pybind11-generated bindings so that Python can interact with it. + +2. Use a native Python type everywhere. It will need to be wrapped so that + C++ functions can interact with it. + +3. Use a native C++ type on the C++ side and a native Python type on the + Python side. pybind11 refers to this as a *type conversion*. + + Type conversions are the most "natural" option in the sense that native + (non-wrapped) types are used everywhere. The main downside is that a copy + of the data must be made on every Python ↔ C++ transition: this is + needed since the C++ and Python versions of the same type generally won't + have the same memory layout. + + pybind11 can perform many kinds of conversions automatically. An overview + is provided in the table ":ref:`conversion_table`". + +The following subsections discuss the differences between these options in more +detail. The main focus in this section is on type conversions, which represent +the last case of the above list. + +.. toctree:: + :maxdepth: 1 + + overview + strings + stl + functional + chrono + eigen + custom diff --git a/third_party/pybind11/docs/advanced/cast/overview.rst b/third_party/pybind11/docs/advanced/cast/overview.rst new file mode 100644 index 0000000000..011bd4c7a3 --- /dev/null +++ b/third_party/pybind11/docs/advanced/cast/overview.rst @@ -0,0 +1,170 @@ +Overview +######## + +.. rubric:: 1. Native type in C++, wrapper in Python + +Exposing a custom C++ type using :class:`py::class_` was covered in detail +in the :doc:`/classes` section. There, the underlying data structure is +always the original C++ class while the :class:`py::class_` wrapper provides +a Python interface. Internally, when an object like this is sent from C++ to +Python, pybind11 will just add the outer wrapper layer over the native C++ +object. Getting it back from Python is just a matter of peeling off the +wrapper. + +.. rubric:: 2. Wrapper in C++, native type in Python + +This is the exact opposite situation. Now, we have a type which is native to +Python, like a ``tuple`` or a ``list``. One way to get this data into C++ is +with the :class:`py::object` family of wrappers. These are explained in more +detail in the :doc:`/advanced/pycpp/object` section. We'll just give a quick +example here: + +.. code-block:: cpp + + void print_list(py::list my_list) { + for (auto item : my_list) + std::cout << item << " "; + } + +.. code-block:: pycon + + >>> print_list([1, 2, 3]) + 1 2 3 + +The Python ``list`` is not converted in any way -- it's just wrapped in a C++ +:class:`py::list` class. At its core it's still a Python object. Copying a +:class:`py::list` will do the usual reference-counting like in Python. +Returning the object to Python will just remove the thin wrapper. + +.. rubric:: 3. Converting between native C++ and Python types + +In the previous two cases we had a native type in one language and a wrapper in +the other. Now, we have native types on both sides and we convert between them. + +.. code-block:: cpp + + void print_vector(const std::vector &v) { + for (auto item : v) + std::cout << item << "\n"; + } + +.. code-block:: pycon + + >>> print_vector([1, 2, 3]) + 1 2 3 + +In this case, pybind11 will construct a new ``std::vector`` and copy each +element from the Python ``list``. The newly constructed object will be passed +to ``print_vector``. The same thing happens in the other direction: a new +``list`` is made to match the value returned from C++. + +Lots of these conversions are supported out of the box, as shown in the table +below. They are very convenient, but keep in mind that these conversions are +fundamentally based on copying data. This is perfectly fine for small immutable +types but it may become quite expensive for large data structures. This can be +avoided by overriding the automatic conversion with a custom wrapper (i.e. the +above-mentioned approach 1). This requires some manual effort and more details +are available in the :ref:`opaque` section. + +.. _conversion_table: + +List of all builtin conversions +------------------------------- + +The following basic data types are supported out of the box (some may require +an additional extension header to be included). To pass other data structures +as arguments and return values, refer to the section on binding :ref:`classes`. + ++------------------------------------+---------------------------+-----------------------------------+ +| Data type | Description | Header file | ++====================================+===========================+===================================+ +| ``int8_t``, ``uint8_t`` | 8-bit integers | :file:`pybind11/pybind11.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``int16_t``, ``uint16_t`` | 16-bit integers | :file:`pybind11/pybind11.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``int32_t``, ``uint32_t`` | 32-bit integers | :file:`pybind11/pybind11.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``int64_t``, ``uint64_t`` | 64-bit integers | :file:`pybind11/pybind11.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``ssize_t``, ``size_t`` | Platform-dependent size | :file:`pybind11/pybind11.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``float``, ``double`` | Floating point types | :file:`pybind11/pybind11.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``bool`` | Two-state Boolean type | :file:`pybind11/pybind11.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``char`` | Character literal | :file:`pybind11/pybind11.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``char16_t`` | UTF-16 character literal | :file:`pybind11/pybind11.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``char32_t`` | UTF-32 character literal | :file:`pybind11/pybind11.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``wchar_t`` | Wide character literal | :file:`pybind11/pybind11.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``const char *`` | UTF-8 string literal | :file:`pybind11/pybind11.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``const char16_t *`` | UTF-16 string literal | :file:`pybind11/pybind11.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``const char32_t *`` | UTF-32 string literal | :file:`pybind11/pybind11.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``const wchar_t *`` | Wide string literal | :file:`pybind11/pybind11.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``std::string`` | STL dynamic UTF-8 string | :file:`pybind11/pybind11.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``std::u16string`` | STL dynamic UTF-16 string | :file:`pybind11/pybind11.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``std::u32string`` | STL dynamic UTF-32 string | :file:`pybind11/pybind11.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``std::wstring`` | STL dynamic wide string | :file:`pybind11/pybind11.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``std::string_view``, | STL C++17 string views | :file:`pybind11/pybind11.h` | +| ``std::u16string_view``, etc. | | | ++------------------------------------+---------------------------+-----------------------------------+ +| ``std::pair`` | Pair of two custom types | :file:`pybind11/pybind11.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``std::tuple<...>`` | Arbitrary tuple of types | :file:`pybind11/pybind11.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``std::reference_wrapper<...>`` | Reference type wrapper | :file:`pybind11/pybind11.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``std::complex`` | Complex numbers | :file:`pybind11/complex.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``std::array`` | STL static array | :file:`pybind11/stl.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``std::vector`` | STL dynamic array | :file:`pybind11/stl.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``std::deque`` | STL double-ended queue | :file:`pybind11/stl.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``std::valarray`` | STL value array | :file:`pybind11/stl.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``std::list`` | STL linked list | :file:`pybind11/stl.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``std::map`` | STL ordered map | :file:`pybind11/stl.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``std::unordered_map`` | STL unordered map | :file:`pybind11/stl.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``std::set`` | STL ordered set | :file:`pybind11/stl.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``std::unordered_set`` | STL unordered set | :file:`pybind11/stl.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``std::optional`` | STL optional type (C++17) | :file:`pybind11/stl.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``std::experimental::optional`` | STL optional type (exp.) | :file:`pybind11/stl.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``std::variant<...>`` | Type-safe union (C++17) | :file:`pybind11/stl.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``std::filesystem::path`` | STL path (C++17) [#]_ | :file:`pybind11/stl/filesystem.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``std::function<...>`` | STL polymorphic function | :file:`pybind11/functional.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``std::chrono::duration<...>`` | STL time duration | :file:`pybind11/chrono.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``std::chrono::time_point<...>`` | STL date/time | :file:`pybind11/chrono.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``Eigen::Matrix<...>`` | Eigen: dense matrix | :file:`pybind11/eigen.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``Eigen::Map<...>`` | Eigen: mapped memory | :file:`pybind11/eigen.h` | ++------------------------------------+---------------------------+-----------------------------------+ +| ``Eigen::SparseMatrix<...>`` | Eigen: sparse matrix | :file:`pybind11/eigen.h` | ++------------------------------------+---------------------------+-----------------------------------+ + +.. [#] ``std::filesystem::path`` is converted to ``pathlib.Path`` and + ``os.PathLike`` is converted to ``std::filesystem::path``. diff --git a/third_party/pybind11/docs/advanced/cast/stl.rst b/third_party/pybind11/docs/advanced/cast/stl.rst new file mode 100644 index 0000000000..109763f7aa --- /dev/null +++ b/third_party/pybind11/docs/advanced/cast/stl.rst @@ -0,0 +1,249 @@ +STL containers +############## + +Automatic conversion +==================== + +When including the additional header file :file:`pybind11/stl.h`, conversions +between ``std::vector<>``/``std::deque<>``/``std::list<>``/``std::array<>``/``std::valarray<>``, +``std::set<>``/``std::unordered_set<>``, and +``std::map<>``/``std::unordered_map<>`` and the Python ``list``, ``set`` and +``dict`` data structures are automatically enabled. The types ``std::pair<>`` +and ``std::tuple<>`` are already supported out of the box with just the core +:file:`pybind11/pybind11.h` header. + +The major downside of these implicit conversions is that containers must be +converted (i.e. copied) on every Python->C++ and C++->Python transition, which +can have implications on the program semantics and performance. Please read the +next sections for more details and alternative approaches that avoid this. + +.. note:: + + Arbitrary nesting of any of these types is possible. + +.. seealso:: + + The file :file:`tests/test_stl.cpp` contains a complete + example that demonstrates how to pass STL data types in more detail. + +.. _cpp17_container_casters: + +C++17 library containers +======================== + +The :file:`pybind11/stl.h` header also includes support for ``std::optional<>`` +and ``std::variant<>``. These require a C++17 compiler and standard library. +In C++14 mode, ``std::experimental::optional<>`` is supported if available. + +Various versions of these containers also exist for C++11 (e.g. in Boost). +pybind11 provides an easy way to specialize the ``type_caster`` for such +types: + +.. code-block:: cpp + + // `boost::optional` as an example -- can be any `std::optional`-like container + namespace pybind11 { namespace detail { + template + struct type_caster> : optional_caster> {}; + }} + +The above should be placed in a header file and included in all translation units +where automatic conversion is needed. Similarly, a specialization can be provided +for custom variant types: + +.. code-block:: cpp + + // `boost::variant` as an example -- can be any `std::variant`-like container + namespace pybind11 { namespace detail { + template + struct type_caster> : variant_caster> {}; + + // Specifies the function used to visit the variant -- `apply_visitor` instead of `visit` + template <> + struct visit_helper { + template + static auto call(Args &&...args) -> decltype(boost::apply_visitor(args...)) { + return boost::apply_visitor(args...); + } + }; + }} // namespace pybind11::detail + +The ``visit_helper`` specialization is not required if your ``name::variant`` provides +a ``name::visit()`` function. For any other function name, the specialization must be +included to tell pybind11 how to visit the variant. + +.. warning:: + + When converting a ``variant`` type, pybind11 follows the same rules as when + determining which function overload to call (:ref:`overload_resolution`), and + so the same caveats hold. In particular, the order in which the ``variant``'s + alternatives are listed is important, since pybind11 will try conversions in + this order. This means that, for example, when converting ``variant``, + the ``bool`` variant will never be selected, as any Python ``bool`` is already + an ``int`` and is convertible to a C++ ``int``. Changing the order of alternatives + (and using ``variant``, in this example) provides a solution. + +.. note:: + + pybind11 only supports the modern implementation of ``boost::variant`` + which makes use of variadic templates. This requires Boost 1.56 or newer. + +.. _opaque: + +Making opaque types +=================== + +pybind11 heavily relies on a template matching mechanism to convert parameters +and return values that are constructed from STL data types such as vectors, +linked lists, hash tables, etc. This even works in a recursive manner, for +instance to deal with lists of hash maps of pairs of elementary and custom +types, etc. + +However, a fundamental limitation of this approach is that internal conversions +between Python and C++ types involve a copy operation that prevents +pass-by-reference semantics. What does this mean? + +Suppose we bind the following function + +.. code-block:: cpp + + void append_1(std::vector &v) { + v.push_back(1); + } + +and call it from Python, the following happens: + +.. code-block:: pycon + + >>> v = [5, 6] + >>> append_1(v) + >>> print(v) + [5, 6] + +As you can see, when passing STL data structures by reference, modifications +are not propagated back the Python side. A similar situation arises when +exposing STL data structures using the ``def_readwrite`` or ``def_readonly`` +functions: + +.. code-block:: cpp + + /* ... definition ... */ + + class MyClass { + std::vector contents; + }; + + /* ... binding code ... */ + + py::class_(m, "MyClass") + .def(py::init<>()) + .def_readwrite("contents", &MyClass::contents); + +In this case, properties can be read and written in their entirety. However, an +``append`` operation involving such a list type has no effect: + +.. code-block:: pycon + + >>> m = MyClass() + >>> m.contents = [5, 6] + >>> print(m.contents) + [5, 6] + >>> m.contents.append(7) + >>> print(m.contents) + [5, 6] + +Finally, the involved copy operations can be costly when dealing with very +large lists. To deal with all of the above situations, pybind11 provides a +macro named ``PYBIND11_MAKE_OPAQUE(T)`` that disables the template-based +conversion machinery of types, thus rendering them *opaque*. The contents of +opaque objects are never inspected or extracted, hence they *can* be passed by +reference. For instance, to turn ``std::vector`` into an opaque type, add +the declaration + +.. code-block:: cpp + + PYBIND11_MAKE_OPAQUE(std::vector); + +before any binding code (e.g. invocations to ``class_::def()``, etc.). This +macro must be specified at the top level (and outside of any namespaces), since +it adds a template instantiation of ``type_caster``. If your binding code consists of +multiple compilation units, it must be present in every file (typically via a +common header) preceding any usage of ``std::vector``. Opaque types must +also have a corresponding ``class_`` declaration to associate them with a name +in Python, and to define a set of available operations, e.g.: + +.. code-block:: cpp + + py::class_>(m, "IntVector") + .def(py::init<>()) + .def("clear", &std::vector::clear) + .def("pop_back", &std::vector::pop_back) + .def("__len__", [](const std::vector &v) { return v.size(); }) + .def("__iter__", [](std::vector &v) { + return py::make_iterator(v.begin(), v.end()); + }, py::keep_alive<0, 1>()) /* Keep vector alive while iterator is used */ + // .... + +.. seealso:: + + The file :file:`tests/test_opaque_types.cpp` contains a complete + example that demonstrates how to create and expose opaque types using + pybind11 in more detail. + +.. _stl_bind: + +Binding STL containers +====================== + +The ability to expose STL containers as native Python objects is a fairly +common request, hence pybind11 also provides an optional header file named +:file:`pybind11/stl_bind.h` that does exactly this. The mapped containers try +to match the behavior of their native Python counterparts as much as possible. + +The following example showcases usage of :file:`pybind11/stl_bind.h`: + +.. code-block:: cpp + + // Don't forget this + #include + + PYBIND11_MAKE_OPAQUE(std::vector); + PYBIND11_MAKE_OPAQUE(std::map); + + // ... + + // later in binding code: + py::bind_vector>(m, "VectorInt"); + py::bind_map>(m, "MapStringDouble"); + +When binding STL containers pybind11 considers the types of the container's +elements to decide whether the container should be confined to the local module +(via the :ref:`module_local` feature). If the container element types are +anything other than already-bound custom types bound without +``py::module_local()`` the container binding will have ``py::module_local()`` +applied. This includes converting types such as numeric types, strings, Eigen +types; and types that have not yet been bound at the time of the stl container +binding. This module-local binding is designed to avoid potential conflicts +between module bindings (for example, from two separate modules each attempting +to bind ``std::vector`` as a python type). + +It is possible to override this behavior to force a definition to be either +module-local or global. To do so, you can pass the attributes +``py::module_local()`` (to make the binding module-local) or +``py::module_local(false)`` (to make the binding global) into the +``py::bind_vector`` or ``py::bind_map`` arguments: + +.. code-block:: cpp + + py::bind_vector>(m, "VectorInt", py::module_local(false)); + +Note, however, that such a global binding would make it impossible to load this +module at the same time as any other pybind module that also attempts to bind +the same container type (``std::vector`` in the above example). + +See :ref:`module_local` for more details on module-local bindings. + +.. seealso:: + + The file :file:`tests/test_stl_binders.cpp` shows how to use the + convenience STL container wrappers. diff --git a/third_party/pybind11/docs/advanced/cast/strings.rst b/third_party/pybind11/docs/advanced/cast/strings.rst new file mode 100644 index 0000000000..e246c5219a --- /dev/null +++ b/third_party/pybind11/docs/advanced/cast/strings.rst @@ -0,0 +1,292 @@ +Strings, bytes and Unicode conversions +###################################### + +Passing Python strings to C++ +============================= + +When a Python ``str`` is passed from Python to a C++ function that accepts +``std::string`` or ``char *`` as arguments, pybind11 will encode the Python +string to UTF-8. All Python ``str`` can be encoded in UTF-8, so this operation +does not fail. + +The C++ language is encoding agnostic. It is the responsibility of the +programmer to track encodings. It's often easiest to simply `use UTF-8 +everywhere `_. + +.. code-block:: c++ + + m.def("utf8_test", + [](const std::string &s) { + cout << "utf-8 is icing on the cake.\n"; + cout << s; + } + ); + m.def("utf8_charptr", + [](const char *s) { + cout << "My favorite food is\n"; + cout << s; + } + ); + +.. code-block:: pycon + + >>> utf8_test("🎂") + utf-8 is icing on the cake. + 🎂 + + >>> utf8_charptr("🍕") + My favorite food is + 🍕 + +.. note:: + + Some terminal emulators do not support UTF-8 or emoji fonts and may not + display the example above correctly. + +The results are the same whether the C++ function accepts arguments by value or +reference, and whether or not ``const`` is used. + +Passing bytes to C++ +-------------------- + +A Python ``bytes`` object will be passed to C++ functions that accept +``std::string`` or ``char*`` *without* conversion. In order to make a function +*only* accept ``bytes`` (and not ``str``), declare it as taking a ``py::bytes`` +argument. + + +Returning C++ strings to Python +=============================== + +When a C++ function returns a ``std::string`` or ``char*`` to a Python caller, +**pybind11 will assume that the string is valid UTF-8** and will decode it to a +native Python ``str``, using the same API as Python uses to perform +``bytes.decode('utf-8')``. If this implicit conversion fails, pybind11 will +raise a ``UnicodeDecodeError``. + +.. code-block:: c++ + + m.def("std_string_return", + []() { + return std::string("This string needs to be UTF-8 encoded"); + } + ); + +.. code-block:: pycon + + >>> isinstance(example.std_string_return(), str) + True + + +Because UTF-8 is inclusive of pure ASCII, there is never any issue with +returning a pure ASCII string to Python. If there is any possibility that the +string is not pure ASCII, it is necessary to ensure the encoding is valid +UTF-8. + +.. warning:: + + Implicit conversion assumes that a returned ``char *`` is null-terminated. + If there is no null terminator a buffer overrun will occur. + +Explicit conversions +-------------------- + +If some C++ code constructs a ``std::string`` that is not a UTF-8 string, one +can perform a explicit conversion and return a ``py::str`` object. Explicit +conversion has the same overhead as implicit conversion. + +.. code-block:: c++ + + // This uses the Python C API to convert Latin-1 to Unicode + m.def("str_output", + []() { + std::string s = "Send your r\xe9sum\xe9 to Alice in HR"; // Latin-1 + py::str py_s = PyUnicode_DecodeLatin1(s.data(), s.length()); + return py_s; + } + ); + +.. code-block:: pycon + + >>> str_output() + 'Send your résumé to Alice in HR' + +The `Python C API +`_ provides +several built-in codecs. + + +One could also use a third party encoding library such as libiconv to transcode +to UTF-8. + +Return C++ strings without conversion +------------------------------------- + +If the data in a C++ ``std::string`` does not represent text and should be +returned to Python as ``bytes``, then one can return the data as a +``py::bytes`` object. + +.. code-block:: c++ + + m.def("return_bytes", + []() { + std::string s("\xba\xd0\xba\xd0"); // Not valid UTF-8 + return py::bytes(s); // Return the data without transcoding + } + ); + +.. code-block:: pycon + + >>> example.return_bytes() + b'\xba\xd0\xba\xd0' + + +Note the asymmetry: pybind11 will convert ``bytes`` to ``std::string`` without +encoding, but cannot convert ``std::string`` back to ``bytes`` implicitly. + +.. code-block:: c++ + + m.def("asymmetry", + [](std::string s) { // Accepts str or bytes from Python + return s; // Looks harmless, but implicitly converts to str + } + ); + +.. code-block:: pycon + + >>> isinstance(example.asymmetry(b"have some bytes"), str) + True + + >>> example.asymmetry(b"\xba\xd0\xba\xd0") # invalid utf-8 as bytes + UnicodeDecodeError: 'utf-8' codec can't decode byte 0xba in position 0: invalid start byte + + +Wide character strings +====================== + +When a Python ``str`` is passed to a C++ function expecting ``std::wstring``, +``wchar_t*``, ``std::u16string`` or ``std::u32string``, the ``str`` will be +encoded to UTF-16 or UTF-32 depending on how the C++ compiler implements each +type, in the platform's native endianness. When strings of these types are +returned, they are assumed to contain valid UTF-16 or UTF-32, and will be +decoded to Python ``str``. + +.. code-block:: c++ + + #define UNICODE + #include + + m.def("set_window_text", + [](HWND hwnd, std::wstring s) { + // Call SetWindowText with null-terminated UTF-16 string + ::SetWindowText(hwnd, s.c_str()); + } + ); + m.def("get_window_text", + [](HWND hwnd) { + const int buffer_size = ::GetWindowTextLength(hwnd) + 1; + auto buffer = std::make_unique< wchar_t[] >(buffer_size); + + ::GetWindowText(hwnd, buffer.data(), buffer_size); + + std::wstring text(buffer.get()); + + // wstring will be converted to Python str + return text; + } + ); + +Strings in multibyte encodings such as Shift-JIS must transcoded to a +UTF-8/16/32 before being returned to Python. + + +Character literals +================== + +C++ functions that accept character literals as input will receive the first +character of a Python ``str`` as their input. If the string is longer than one +Unicode character, trailing characters will be ignored. + +When a character literal is returned from C++ (such as a ``char`` or a +``wchar_t``), it will be converted to a ``str`` that represents the single +character. + +.. code-block:: c++ + + m.def("pass_char", [](char c) { return c; }); + m.def("pass_wchar", [](wchar_t w) { return w; }); + +.. code-block:: pycon + + >>> example.pass_char("A") + 'A' + +While C++ will cast integers to character types (``char c = 0x65;``), pybind11 +does not convert Python integers to characters implicitly. The Python function +``chr()`` can be used to convert integers to characters. + +.. code-block:: pycon + + >>> example.pass_char(0x65) + TypeError + + >>> example.pass_char(chr(0x65)) + 'A' + +If the desire is to work with an 8-bit integer, use ``int8_t`` or ``uint8_t`` +as the argument type. + +Grapheme clusters +----------------- + +A single grapheme may be represented by two or more Unicode characters. For +example 'é' is usually represented as U+00E9 but can also be expressed as the +combining character sequence U+0065 U+0301 (that is, the letter 'e' followed by +a combining acute accent). The combining character will be lost if the +two-character sequence is passed as an argument, even though it renders as a +single grapheme. + +.. code-block:: pycon + + >>> example.pass_wchar("é") + 'é' + + >>> combining_e_acute = "e" + "\u0301" + + >>> combining_e_acute + 'é' + + >>> combining_e_acute == "é" + False + + >>> example.pass_wchar(combining_e_acute) + 'e' + +Normalizing combining characters before passing the character literal to C++ +may resolve *some* of these issues: + +.. code-block:: pycon + + >>> example.pass_wchar(unicodedata.normalize("NFC", combining_e_acute)) + 'é' + +In some languages (Thai for example), there are `graphemes that cannot be +expressed as a single Unicode code point +`_, so there is +no way to capture them in a C++ character type. + + +C++17 string views +================== + +C++17 string views are automatically supported when compiling in C++17 mode. +They follow the same rules for encoding and decoding as the corresponding STL +string type (for example, a ``std::u16string_view`` argument will be passed +UTF-16-encoded data, and a returned ``std::string_view`` will be decoded as +UTF-8). + +References +========== + +* `The Absolute Minimum Every Software Developer Absolutely, Positively Must Know About Unicode and Character Sets (No Excuses!) `_ +* `C++ - Using STL Strings at Win32 API Boundaries `_ diff --git a/third_party/pybind11/docs/advanced/classes.rst b/third_party/pybind11/docs/advanced/classes.rst new file mode 100644 index 0000000000..49ddf5c0b1 --- /dev/null +++ b/third_party/pybind11/docs/advanced/classes.rst @@ -0,0 +1,1335 @@ +Classes +####### + +This section presents advanced binding code for classes and it is assumed +that you are already familiar with the basics from :doc:`/classes`. + +.. _overriding_virtuals: + +Overriding virtual functions in Python +====================================== + +Suppose that a C++ class or interface has a virtual function that we'd like +to override from within Python (we'll focus on the class ``Animal``; ``Dog`` is +given as a specific example of how one would do this with traditional C++ +code). + +.. code-block:: cpp + + class Animal { + public: + virtual ~Animal() { } + virtual std::string go(int n_times) = 0; + }; + + class Dog : public Animal { + public: + std::string go(int n_times) override { + std::string result; + for (int i=0; igo(3); + } + +Normally, the binding code for these classes would look as follows: + +.. code-block:: cpp + + PYBIND11_MODULE(example, m) { + py::class_(m, "Animal") + .def("go", &Animal::go); + + py::class_(m, "Dog") + .def(py::init<>()); + + m.def("call_go", &call_go); + } + +However, these bindings are impossible to extend: ``Animal`` is not +constructible, and we clearly require some kind of "trampoline" that +redirects virtual calls back to Python. + +Defining a new type of ``Animal`` from within Python is possible but requires a +helper class that is defined as follows: + +.. code-block:: cpp + + class PyAnimal : public Animal { + public: + /* Inherit the constructors */ + using Animal::Animal; + + /* Trampoline (need one for each virtual function) */ + std::string go(int n_times) override { + PYBIND11_OVERRIDE_PURE( + std::string, /* Return type */ + Animal, /* Parent class */ + go, /* Name of function in C++ (must match Python name) */ + n_times /* Argument(s) */ + ); + } + }; + +The macro :c:macro:`PYBIND11_OVERRIDE_PURE` should be used for pure virtual +functions, and :c:macro:`PYBIND11_OVERRIDE` should be used for functions which have +a default implementation. There are also two alternate macros +:c:macro:`PYBIND11_OVERRIDE_PURE_NAME` and :c:macro:`PYBIND11_OVERRIDE_NAME` which +take a string-valued name argument between the *Parent class* and *Name of the +function* slots, which defines the name of function in Python. This is required +when the C++ and Python versions of the +function have different names, e.g. ``operator()`` vs ``__call__``. + +The binding code also needs a few minor adaptations (highlighted): + +.. code-block:: cpp + :emphasize-lines: 2,3 + + PYBIND11_MODULE(example, m) { + py::class_(m, "Animal") + .def(py::init<>()) + .def("go", &Animal::go); + + py::class_(m, "Dog") + .def(py::init<>()); + + m.def("call_go", &call_go); + } + +Importantly, pybind11 is made aware of the trampoline helper class by +specifying it as an extra template argument to :class:`class_`. (This can also +be combined with other template arguments such as a custom holder type; the +order of template types does not matter). Following this, we are able to +define a constructor as usual. + +Bindings should be made against the actual class, not the trampoline helper class. + +.. code-block:: cpp + :emphasize-lines: 3 + + py::class_(m, "Animal"); + .def(py::init<>()) + .def("go", &PyAnimal::go); /* <--- THIS IS WRONG, use &Animal::go */ + +Note, however, that the above is sufficient for allowing python classes to +extend ``Animal``, but not ``Dog``: see :ref:`virtual_and_inheritance` for the +necessary steps required to providing proper overriding support for inherited +classes. + +The Python session below shows how to override ``Animal::go`` and invoke it via +a virtual method call. + +.. code-block:: pycon + + >>> from example import * + >>> d = Dog() + >>> call_go(d) + 'woof! woof! woof! ' + >>> class Cat(Animal): + ... def go(self, n_times): + ... return "meow! " * n_times + ... + >>> c = Cat() + >>> call_go(c) + 'meow! meow! meow! ' + +If you are defining a custom constructor in a derived Python class, you *must* +ensure that you explicitly call the bound C++ constructor using ``__init__``, +*regardless* of whether it is a default constructor or not. Otherwise, the +memory for the C++ portion of the instance will be left uninitialized, which +will generally leave the C++ instance in an invalid state and cause undefined +behavior if the C++ instance is subsequently used. + +.. versionchanged:: 2.6 + The default pybind11 metaclass will throw a ``TypeError`` when it detects + that ``__init__`` was not called by a derived class. + +Here is an example: + +.. code-block:: python + + class Dachshund(Dog): + def __init__(self, name): + Dog.__init__(self) # Without this, a TypeError is raised. + self.name = name + + def bark(self): + return "yap!" + +Note that a direct ``__init__`` constructor *should be called*, and ``super()`` +should not be used. For simple cases of linear inheritance, ``super()`` +may work, but once you begin mixing Python and C++ multiple inheritance, +things will fall apart due to differences between Python's MRO and C++'s +mechanisms. + +Please take a look at the :ref:`macro_notes` before using this feature. + +.. note:: + + When the overridden type returns a reference or pointer to a type that + pybind11 converts from Python (for example, numeric values, std::string, + and other built-in value-converting types), there are some limitations to + be aware of: + + - because in these cases there is no C++ variable to reference (the value + is stored in the referenced Python variable), pybind11 provides one in + the PYBIND11_OVERRIDE macros (when needed) with static storage duration. + Note that this means that invoking the overridden method on *any* + instance will change the referenced value stored in *all* instances of + that type. + + - Attempts to modify a non-const reference will not have the desired + effect: it will change only the static cache variable, but this change + will not propagate to underlying Python instance, and the change will be + replaced the next time the override is invoked. + +.. warning:: + + The :c:macro:`PYBIND11_OVERRIDE` and accompanying macros used to be called + ``PYBIND11_OVERLOAD`` up until pybind11 v2.5.0, and :func:`get_override` + used to be called ``get_overload``. This naming was corrected and the older + macro and function names may soon be deprecated, in order to reduce + confusion with overloaded functions and methods and ``py::overload_cast`` + (see :ref:`classes`). + +.. seealso:: + + The file :file:`tests/test_virtual_functions.cpp` contains a complete + example that demonstrates how to override virtual functions using pybind11 + in more detail. + +.. _virtual_and_inheritance: + +Combining virtual functions and inheritance +=========================================== + +When combining virtual methods with inheritance, you need to be sure to provide +an override for each method for which you want to allow overrides from derived +python classes. For example, suppose we extend the above ``Animal``/``Dog`` +example as follows: + +.. code-block:: cpp + + class Animal { + public: + virtual std::string go(int n_times) = 0; + virtual std::string name() { return "unknown"; } + }; + class Dog : public Animal { + public: + std::string go(int n_times) override { + std::string result; + for (int i=0; i class PyAnimal : public AnimalBase { + public: + using AnimalBase::AnimalBase; // Inherit constructors + std::string go(int n_times) override { PYBIND11_OVERRIDE_PURE(std::string, AnimalBase, go, n_times); } + std::string name() override { PYBIND11_OVERRIDE(std::string, AnimalBase, name, ); } + }; + template class PyDog : public PyAnimal { + public: + using PyAnimal::PyAnimal; // Inherit constructors + // Override PyAnimal's pure virtual go() with a non-pure one: + std::string go(int n_times) override { PYBIND11_OVERRIDE(std::string, DogBase, go, n_times); } + std::string bark() override { PYBIND11_OVERRIDE(std::string, DogBase, bark, ); } + }; + +This technique has the advantage of requiring just one trampoline method to be +declared per virtual method and pure virtual method override. It does, +however, require the compiler to generate at least as many methods (and +possibly more, if both pure virtual and overridden pure virtual methods are +exposed, as above). + +The classes are then registered with pybind11 using: + +.. code-block:: cpp + + py::class_> animal(m, "Animal"); + py::class_> dog(m, "Dog"); + py::class_> husky(m, "Husky"); + // ... add animal, dog, husky definitions + +Note that ``Husky`` did not require a dedicated trampoline template class at +all, since it neither declares any new virtual methods nor provides any pure +virtual method implementations. + +With either the repeated-virtuals or templated trampoline methods in place, you +can now create a python class that inherits from ``Dog``: + +.. code-block:: python + + class ShihTzu(Dog): + def bark(self): + return "yip!" + +.. seealso:: + + See the file :file:`tests/test_virtual_functions.cpp` for complete examples + using both the duplication and templated trampoline approaches. + +.. _extended_aliases: + +Extended trampoline class functionality +======================================= + +.. _extended_class_functionality_forced_trampoline: + +Forced trampoline class initialisation +-------------------------------------- +The trampoline classes described in the previous sections are, by default, only +initialized when needed. More specifically, they are initialized when a python +class actually inherits from a registered type (instead of merely creating an +instance of the registered type), or when a registered constructor is only +valid for the trampoline class but not the registered class. This is primarily +for performance reasons: when the trampoline class is not needed for anything +except virtual method dispatching, not initializing the trampoline class +improves performance by avoiding needing to do a run-time check to see if the +inheriting python instance has an overridden method. + +Sometimes, however, it is useful to always initialize a trampoline class as an +intermediate class that does more than just handle virtual method dispatching. +For example, such a class might perform extra class initialization, extra +destruction operations, and might define new members and methods to enable a +more python-like interface to a class. + +In order to tell pybind11 that it should *always* initialize the trampoline +class when creating new instances of a type, the class constructors should be +declared using ``py::init_alias()`` instead of the usual +``py::init()``. This forces construction via the trampoline class, +ensuring member initialization and (eventual) destruction. + +.. seealso:: + + See the file :file:`tests/test_virtual_functions.cpp` for complete examples + showing both normal and forced trampoline instantiation. + +Different method signatures +--------------------------- +The macro's introduced in :ref:`overriding_virtuals` cover most of the standard +use cases when exposing C++ classes to Python. Sometimes it is hard or unwieldy +to create a direct one-on-one mapping between the arguments and method return +type. + +An example would be when the C++ signature contains output arguments using +references (See also :ref:`faq_reference_arguments`). Another way of solving +this is to use the method body of the trampoline class to do conversions to the +input and return of the Python method. + +The main building block to do so is the :func:`get_override`, this function +allows retrieving a method implemented in Python from within the trampoline's +methods. Consider for example a C++ method which has the signature +``bool myMethod(int32_t& value)``, where the return indicates whether +something should be done with the ``value``. This can be made convenient on the +Python side by allowing the Python function to return ``None`` or an ``int``: + +.. code-block:: cpp + + bool MyClass::myMethod(int32_t& value) + { + pybind11::gil_scoped_acquire gil; // Acquire the GIL while in this scope. + // Try to look up the overridden method on the Python side. + pybind11::function override = pybind11::get_override(this, "myMethod"); + if (override) { // method is found + auto obj = override(value); // Call the Python function. + if (py::isinstance(obj)) { // check if it returned a Python integer type + value = obj.cast(); // Cast it and assign it to the value. + return true; // Return true; value should be used. + } else { + return false; // Python returned none, return false. + } + } + return false; // Alternatively return MyClass::myMethod(value); + } + + +.. _custom_constructors: + +Custom constructors +=================== + +The syntax for binding constructors was previously introduced, but it only +works when a constructor of the appropriate arguments actually exists on the +C++ side. To extend this to more general cases, pybind11 makes it possible +to bind factory functions as constructors. For example, suppose you have a +class like this: + +.. code-block:: cpp + + class Example { + private: + Example(int); // private constructor + public: + // Factory function: + static Example create(int a) { return Example(a); } + }; + + py::class_(m, "Example") + .def(py::init(&Example::create)); + +While it is possible to create a straightforward binding of the static +``create`` method, it may sometimes be preferable to expose it as a constructor +on the Python side. This can be accomplished by calling ``.def(py::init(...))`` +with the function reference returning the new instance passed as an argument. +It is also possible to use this approach to bind a function returning a new +instance by raw pointer or by the holder (e.g. ``std::unique_ptr``). + +The following example shows the different approaches: + +.. code-block:: cpp + + class Example { + private: + Example(int); // private constructor + public: + // Factory function - returned by value: + static Example create(int a) { return Example(a); } + + // These constructors are publicly callable: + Example(double); + Example(int, int); + Example(std::string); + }; + + py::class_(m, "Example") + // Bind the factory function as a constructor: + .def(py::init(&Example::create)) + // Bind a lambda function returning a pointer wrapped in a holder: + .def(py::init([](std::string arg) { + return std::unique_ptr(new Example(arg)); + })) + // Return a raw pointer: + .def(py::init([](int a, int b) { return new Example(a, b); })) + // You can mix the above with regular C++ constructor bindings as well: + .def(py::init()) + ; + +When the constructor is invoked from Python, pybind11 will call the factory +function and store the resulting C++ instance in the Python instance. + +When combining factory functions constructors with :ref:`virtual function +trampolines ` there are two approaches. The first is to +add a constructor to the alias class that takes a base value by +rvalue-reference. If such a constructor is available, it will be used to +construct an alias instance from the value returned by the factory function. +The second option is to provide two factory functions to ``py::init()``: the +first will be invoked when no alias class is required (i.e. when the class is +being used but not inherited from in Python), and the second will be invoked +when an alias is required. + +You can also specify a single factory function that always returns an alias +instance: this will result in behaviour similar to ``py::init_alias<...>()``, +as described in the :ref:`extended trampoline class documentation +`. + +The following example shows the different factory approaches for a class with +an alias: + +.. code-block:: cpp + + #include + class Example { + public: + // ... + virtual ~Example() = default; + }; + class PyExample : public Example { + public: + using Example::Example; + PyExample(Example &&base) : Example(std::move(base)) {} + }; + py::class_(m, "Example") + // Returns an Example pointer. If a PyExample is needed, the Example + // instance will be moved via the extra constructor in PyExample, above. + .def(py::init([]() { return new Example(); })) + // Two callbacks: + .def(py::init([]() { return new Example(); } /* no alias needed */, + []() { return new PyExample(); } /* alias needed */)) + // *Always* returns an alias instance (like py::init_alias<>()) + .def(py::init([]() { return new PyExample(); })) + ; + +Brace initialization +-------------------- + +``pybind11::init<>`` internally uses C++11 brace initialization to call the +constructor of the target class. This means that it can be used to bind +*implicit* constructors as well: + +.. code-block:: cpp + + struct Aggregate { + int a; + std::string b; + }; + + py::class_(m, "Aggregate") + .def(py::init()); + +.. note:: + + Note that brace initialization preferentially invokes constructor overloads + taking a ``std::initializer_list``. In the rare event that this causes an + issue, you can work around it by using ``py::init(...)`` with a lambda + function that constructs the new object as desired. + +.. _classes_with_non_public_destructors: + +Non-public destructors +====================== + +If a class has a private or protected destructor (as might e.g. be the case in +a singleton pattern), a compile error will occur when creating bindings via +pybind11. The underlying issue is that the ``std::unique_ptr`` holder type that +is responsible for managing the lifetime of instances will reference the +destructor even if no deallocations ever take place. In order to expose classes +with private or protected destructors, it is possible to override the holder +type via a holder type argument to ``class_``. Pybind11 provides a helper class +``py::nodelete`` that disables any destructor invocations. In this case, it is +crucial that instances are deallocated on the C++ side to avoid memory leaks. + +.. code-block:: cpp + + /* ... definition ... */ + + class MyClass { + private: + ~MyClass() { } + }; + + /* ... binding code ... */ + + py::class_>(m, "MyClass") + .def(py::init<>()) + +.. _destructors_that_call_python: + +Destructors that call Python +============================ + +If a Python function is invoked from a C++ destructor, an exception may be thrown +of type :class:`error_already_set`. If this error is thrown out of a class destructor, +``std::terminate()`` will be called, terminating the process. Class destructors +must catch all exceptions of type :class:`error_already_set` to discard the Python +exception using :func:`error_already_set::discard_as_unraisable`. + +Every Python function should be treated as *possibly throwing*. When a Python generator +stops yielding items, Python will throw a ``StopIteration`` exception, which can pass +though C++ destructors if the generator's stack frame holds the last reference to C++ +objects. + +For more information, see :ref:`the documentation on exceptions `. + +.. code-block:: cpp + + class MyClass { + public: + ~MyClass() { + try { + py::print("Even printing is dangerous in a destructor"); + py::exec("raise ValueError('This is an unraisable exception')"); + } catch (py::error_already_set &e) { + // error_context should be information about where/why the occurred, + // e.g. use __func__ to get the name of the current function + e.discard_as_unraisable(__func__); + } + } + }; + +.. note:: + + pybind11 does not support C++ destructors marked ``noexcept(false)``. + +.. versionadded:: 2.6 + +.. _implicit_conversions: + +Implicit conversions +==================== + +Suppose that instances of two types ``A`` and ``B`` are used in a project, and +that an ``A`` can easily be converted into an instance of type ``B`` (examples of this +could be a fixed and an arbitrary precision number type). + +.. code-block:: cpp + + py::class_(m, "A") + /// ... members ... + + py::class_(m, "B") + .def(py::init()) + /// ... members ... + + m.def("func", + [](const B &) { /* .... */ } + ); + +To invoke the function ``func`` using a variable ``a`` containing an ``A`` +instance, we'd have to write ``func(B(a))`` in Python. On the other hand, C++ +will automatically apply an implicit type conversion, which makes it possible +to directly write ``func(a)``. + +In this situation (i.e. where ``B`` has a constructor that converts from +``A``), the following statement enables similar implicit conversions on the +Python side: + +.. code-block:: cpp + + py::implicitly_convertible(); + +.. note:: + + Implicit conversions from ``A`` to ``B`` only work when ``B`` is a custom + data type that is exposed to Python via pybind11. + + To prevent runaway recursion, implicit conversions are non-reentrant: an + implicit conversion invoked as part of another implicit conversion of the + same type (i.e. from ``A`` to ``B``) will fail. + +.. _static_properties: + +Static properties +================= + +The section on :ref:`properties` discussed the creation of instance properties +that are implemented in terms of C++ getters and setters. + +Static properties can also be created in a similar way to expose getters and +setters of static class attributes. Note that the implicit ``self`` argument +also exists in this case and is used to pass the Python ``type`` subclass +instance. This parameter will often not be needed by the C++ side, and the +following example illustrates how to instantiate a lambda getter function +that ignores it: + +.. code-block:: cpp + + py::class_(m, "Foo") + .def_property_readonly_static("foo", [](py::object /* self */) { return Foo(); }); + +Operator overloading +==================== + +Suppose that we're given the following ``Vector2`` class with a vector addition +and scalar multiplication operation, all implemented using overloaded operators +in C++. + +.. code-block:: cpp + + class Vector2 { + public: + Vector2(float x, float y) : x(x), y(y) { } + + Vector2 operator+(const Vector2 &v) const { return Vector2(x + v.x, y + v.y); } + Vector2 operator*(float value) const { return Vector2(x * value, y * value); } + Vector2& operator+=(const Vector2 &v) { x += v.x; y += v.y; return *this; } + Vector2& operator*=(float v) { x *= v; y *= v; return *this; } + + friend Vector2 operator*(float f, const Vector2 &v) { + return Vector2(f * v.x, f * v.y); + } + + std::string toString() const { + return "[" + std::to_string(x) + ", " + std::to_string(y) + "]"; + } + private: + float x, y; + }; + +The following snippet shows how the above operators can be conveniently exposed +to Python. + +.. code-block:: cpp + + #include + + PYBIND11_MODULE(example, m) { + py::class_(m, "Vector2") + .def(py::init()) + .def(py::self + py::self) + .def(py::self += py::self) + .def(py::self *= float()) + .def(float() * py::self) + .def(py::self * float()) + .def(-py::self) + .def("__repr__", &Vector2::toString); + } + +Note that a line like + +.. code-block:: cpp + + .def(py::self * float()) + +is really just short hand notation for + +.. code-block:: cpp + + .def("__mul__", [](const Vector2 &a, float b) { + return a * b; + }, py::is_operator()) + +This can be useful for exposing additional operators that don't exist on the +C++ side, or to perform other types of customization. The ``py::is_operator`` +flag marker is needed to inform pybind11 that this is an operator, which +returns ``NotImplemented`` when invoked with incompatible arguments rather than +throwing a type error. + +.. note:: + + To use the more convenient ``py::self`` notation, the additional + header file :file:`pybind11/operators.h` must be included. + +.. seealso:: + + The file :file:`tests/test_operator_overloading.cpp` contains a + complete example that demonstrates how to work with overloaded operators in + more detail. + +.. _pickling: + +Pickling support +================ + +Python's ``pickle`` module provides a powerful facility to serialize and +de-serialize a Python object graph into a binary data stream. To pickle and +unpickle C++ classes using pybind11, a ``py::pickle()`` definition must be +provided. Suppose the class in question has the following signature: + +.. code-block:: cpp + + class Pickleable { + public: + Pickleable(const std::string &value) : m_value(value) { } + const std::string &value() const { return m_value; } + + void setExtra(int extra) { m_extra = extra; } + int extra() const { return m_extra; } + private: + std::string m_value; + int m_extra = 0; + }; + +Pickling support in Python is enabled by defining the ``__setstate__`` and +``__getstate__`` methods [#f3]_. For pybind11 classes, use ``py::pickle()`` +to bind these two functions: + +.. code-block:: cpp + + py::class_(m, "Pickleable") + .def(py::init()) + .def("value", &Pickleable::value) + .def("extra", &Pickleable::extra) + .def("setExtra", &Pickleable::setExtra) + .def(py::pickle( + [](const Pickleable &p) { // __getstate__ + /* Return a tuple that fully encodes the state of the object */ + return py::make_tuple(p.value(), p.extra()); + }, + [](py::tuple t) { // __setstate__ + if (t.size() != 2) + throw std::runtime_error("Invalid state!"); + + /* Create a new C++ instance */ + Pickleable p(t[0].cast()); + + /* Assign any additional state */ + p.setExtra(t[1].cast()); + + return p; + } + )); + +The ``__setstate__`` part of the ``py::pickle()`` definition follows the same +rules as the single-argument version of ``py::init()``. The return type can be +a value, pointer or holder type. See :ref:`custom_constructors` for details. + +An instance can now be pickled as follows: + +.. code-block:: python + + import pickle + + p = Pickleable("test_value") + p.setExtra(15) + data = pickle.dumps(p) + + +.. note:: + If given, the second argument to ``dumps`` must be 2 or larger - 0 and 1 are + not supported. Newer versions are also fine; for instance, specify ``-1`` to + always use the latest available version. Beware: failure to follow these + instructions will cause important pybind11 memory allocation routines to be + skipped during unpickling, which will likely lead to memory corruption + and/or segmentation faults. Python defaults to version 3 (Python 3-3.7) and + version 4 for Python 3.8+. + +.. seealso:: + + The file :file:`tests/test_pickling.cpp` contains a complete example + that demonstrates how to pickle and unpickle types using pybind11 in more + detail. + +.. [#f3] http://docs.python.org/3/library/pickle.html#pickling-class-instances + +Deepcopy support +================ + +Python normally uses references in assignments. Sometimes a real copy is needed +to prevent changing all copies. The ``copy`` module [#f5]_ provides these +capabilities. + +A class with pickle support is automatically also (deep)copy +compatible. However, performance can be improved by adding custom +``__copy__`` and ``__deepcopy__`` methods. + +For simple classes (deep)copy can be enabled by using the copy constructor, +which should look as follows: + +.. code-block:: cpp + + py::class_(m, "Copyable") + .def("__copy__", [](const Copyable &self) { + return Copyable(self); + }) + .def("__deepcopy__", [](const Copyable &self, py::dict) { + return Copyable(self); + }, "memo"_a); + +.. note:: + + Dynamic attributes will not be copied in this example. + +.. [#f5] https://docs.python.org/3/library/copy.html + +Multiple Inheritance +==================== + +pybind11 can create bindings for types that derive from multiple base types +(aka. *multiple inheritance*). To do so, specify all bases in the template +arguments of the ``class_`` declaration: + +.. code-block:: cpp + + py::class_(m, "MyType") + ... + +The base types can be specified in arbitrary order, and they can even be +interspersed with alias types and holder types (discussed earlier in this +document)---pybind11 will automatically find out which is which. The only +requirement is that the first template argument is the type to be declared. + +It is also permitted to inherit multiply from exported C++ classes in Python, +as well as inheriting from multiple Python and/or pybind11-exported classes. + +There is one caveat regarding the implementation of this feature: + +When only one base type is specified for a C++ type that actually has multiple +bases, pybind11 will assume that it does not participate in multiple +inheritance, which can lead to undefined behavior. In such cases, add the tag +``multiple_inheritance`` to the class constructor: + +.. code-block:: cpp + + py::class_(m, "MyType", py::multiple_inheritance()); + +The tag is redundant and does not need to be specified when multiple base types +are listed. + +.. _module_local: + +Module-local class bindings +=========================== + +When creating a binding for a class, pybind11 by default makes that binding +"global" across modules. What this means is that a type defined in one module +can be returned from any module resulting in the same Python type. For +example, this allows the following: + +.. code-block:: cpp + + // In the module1.cpp binding code for module1: + py::class_(m, "Pet") + .def(py::init()) + .def_readonly("name", &Pet::name); + +.. code-block:: cpp + + // In the module2.cpp binding code for module2: + m.def("create_pet", [](std::string name) { return new Pet(name); }); + +.. code-block:: pycon + + >>> from module1 import Pet + >>> from module2 import create_pet + >>> pet1 = Pet("Kitty") + >>> pet2 = create_pet("Doggy") + >>> pet2.name() + 'Doggy' + +When writing binding code for a library, this is usually desirable: this +allows, for example, splitting up a complex library into multiple Python +modules. + +In some cases, however, this can cause conflicts. For example, suppose two +unrelated modules make use of an external C++ library and each provide custom +bindings for one of that library's classes. This will result in an error when +a Python program attempts to import both modules (directly or indirectly) +because of conflicting definitions on the external type: + +.. code-block:: cpp + + // dogs.cpp + + // Binding for external library class: + py::class(m, "Pet") + .def("name", &pets::Pet::name); + + // Binding for local extension class: + py::class(m, "Dog") + .def(py::init()); + +.. code-block:: cpp + + // cats.cpp, in a completely separate project from the above dogs.cpp. + + // Binding for external library class: + py::class(m, "Pet") + .def("get_name", &pets::Pet::name); + + // Binding for local extending class: + py::class(m, "Cat") + .def(py::init()); + +.. code-block:: pycon + + >>> import cats + >>> import dogs + Traceback (most recent call last): + File "", line 1, in + ImportError: generic_type: type "Pet" is already registered! + +To get around this, you can tell pybind11 to keep the external class binding +localized to the module by passing the ``py::module_local()`` attribute into +the ``py::class_`` constructor: + +.. code-block:: cpp + + // Pet binding in dogs.cpp: + py::class(m, "Pet", py::module_local()) + .def("name", &pets::Pet::name); + +.. code-block:: cpp + + // Pet binding in cats.cpp: + py::class(m, "Pet", py::module_local()) + .def("get_name", &pets::Pet::name); + +This makes the Python-side ``dogs.Pet`` and ``cats.Pet`` into distinct classes, +avoiding the conflict and allowing both modules to be loaded. C++ code in the +``dogs`` module that casts or returns a ``Pet`` instance will result in a +``dogs.Pet`` Python instance, while C++ code in the ``cats`` module will result +in a ``cats.Pet`` Python instance. + +This does come with two caveats, however: First, external modules cannot return +or cast a ``Pet`` instance to Python (unless they also provide their own local +bindings). Second, from the Python point of view they are two distinct classes. + +Note that the locality only applies in the C++ -> Python direction. When +passing such a ``py::module_local`` type into a C++ function, the module-local +classes are still considered. This means that if the following function is +added to any module (including but not limited to the ``cats`` and ``dogs`` +modules above) it will be callable with either a ``dogs.Pet`` or ``cats.Pet`` +argument: + +.. code-block:: cpp + + m.def("pet_name", [](const pets::Pet &pet) { return pet.name(); }); + +For example, suppose the above function is added to each of ``cats.cpp``, +``dogs.cpp`` and ``frogs.cpp`` (where ``frogs.cpp`` is some other module that +does *not* bind ``Pets`` at all). + +.. code-block:: pycon + + >>> import cats, dogs, frogs # No error because of the added py::module_local() + >>> mycat, mydog = cats.Cat("Fluffy"), dogs.Dog("Rover") + >>> (cats.pet_name(mycat), dogs.pet_name(mydog)) + ('Fluffy', 'Rover') + >>> (cats.pet_name(mydog), dogs.pet_name(mycat), frogs.pet_name(mycat)) + ('Rover', 'Fluffy', 'Fluffy') + +It is possible to use ``py::module_local()`` registrations in one module even +if another module registers the same type globally: within the module with the +module-local definition, all C++ instances will be cast to the associated bound +Python type. In other modules any such values are converted to the global +Python type created elsewhere. + +.. note:: + + STL bindings (as provided via the optional :file:`pybind11/stl_bind.h` + header) apply ``py::module_local`` by default when the bound type might + conflict with other modules; see :ref:`stl_bind` for details. + +.. note:: + + The localization of the bound types is actually tied to the shared object + or binary generated by the compiler/linker. For typical modules created + with ``PYBIND11_MODULE()``, this distinction is not significant. It is + possible, however, when :ref:`embedding` to embed multiple modules in the + same binary (see :ref:`embedding_modules`). In such a case, the + localization will apply across all embedded modules within the same binary. + +.. seealso:: + + The file :file:`tests/test_local_bindings.cpp` contains additional examples + that demonstrate how ``py::module_local()`` works. + +Binding protected member functions +================================== + +It's normally not possible to expose ``protected`` member functions to Python: + +.. code-block:: cpp + + class A { + protected: + int foo() const { return 42; } + }; + + py::class_(m, "A") + .def("foo", &A::foo); // error: 'foo' is a protected member of 'A' + +On one hand, this is good because non-``public`` members aren't meant to be +accessed from the outside. But we may want to make use of ``protected`` +functions in derived Python classes. + +The following pattern makes this possible: + +.. code-block:: cpp + + class A { + protected: + int foo() const { return 42; } + }; + + class Publicist : public A { // helper type for exposing protected functions + public: + using A::foo; // inherited with different access modifier + }; + + py::class_(m, "A") // bind the primary class + .def("foo", &Publicist::foo); // expose protected methods via the publicist + +This works because ``&Publicist::foo`` is exactly the same function as +``&A::foo`` (same signature and address), just with a different access +modifier. The only purpose of the ``Publicist`` helper class is to make +the function name ``public``. + +If the intent is to expose ``protected`` ``virtual`` functions which can be +overridden in Python, the publicist pattern can be combined with the previously +described trampoline: + +.. code-block:: cpp + + class A { + public: + virtual ~A() = default; + + protected: + virtual int foo() const { return 42; } + }; + + class Trampoline : public A { + public: + int foo() const override { PYBIND11_OVERRIDE(int, A, foo, ); } + }; + + class Publicist : public A { + public: + using A::foo; + }; + + py::class_(m, "A") // <-- `Trampoline` here + .def("foo", &Publicist::foo); // <-- `Publicist` here, not `Trampoline`! + +Binding final classes +===================== + +Some classes may not be appropriate to inherit from. In C++11, classes can +use the ``final`` specifier to ensure that a class cannot be inherited from. +The ``py::is_final`` attribute can be used to ensure that Python classes +cannot inherit from a specified type. The underlying C++ type does not need +to be declared final. + +.. code-block:: cpp + + class IsFinal final {}; + + py::class_(m, "IsFinal", py::is_final()); + +When you try to inherit from such a class in Python, you will now get this +error: + +.. code-block:: pycon + + >>> class PyFinalChild(IsFinal): + ... pass + ... + TypeError: type 'IsFinal' is not an acceptable base type + +.. note:: This attribute is currently ignored on PyPy + +.. versionadded:: 2.6 + +Binding classes with template parameters +======================================== + +pybind11 can also wrap classes that have template parameters. Consider these classes: + +.. code-block:: cpp + + struct Cat {}; + struct Dog {}; + + template + struct Cage { + Cage(PetType& pet); + PetType& get(); + }; + +C++ templates may only be instantiated at compile time, so pybind11 can only +wrap instantiated templated classes. You cannot wrap a non-instantiated template: + +.. code-block:: cpp + + // BROKEN (this will not compile) + py::class_(m, "Cage"); + .def("get", &Cage::get); + +You must explicitly specify each template/type combination that you want to +wrap separately. + +.. code-block:: cpp + + // ok + py::class_>(m, "CatCage") + .def("get", &Cage::get); + + // ok + py::class_>(m, "DogCage") + .def("get", &Cage::get); + +If your class methods have template parameters you can wrap those as well, +but once again each instantiation must be explicitly specified: + +.. code-block:: cpp + + typename + struct MyClass { + template + T fn(V v); + }; + + py::class>(m, "MyClassT") + .def("fn", &MyClass::fn); + +Custom automatic downcasters +============================ + +As explained in :ref:`inheritance`, pybind11 comes with built-in +understanding of the dynamic type of polymorphic objects in C++; that +is, returning a Pet to Python produces a Python object that knows it's +wrapping a Dog, if Pet has virtual methods and pybind11 knows about +Dog and this Pet is in fact a Dog. Sometimes, you might want to +provide this automatic downcasting behavior when creating bindings for +a class hierarchy that does not use standard C++ polymorphism, such as +LLVM [#f4]_. As long as there's some way to determine at runtime +whether a downcast is safe, you can proceed by specializing the +``pybind11::polymorphic_type_hook`` template: + +.. code-block:: cpp + + enum class PetKind { Cat, Dog, Zebra }; + struct Pet { // Not polymorphic: has no virtual methods + const PetKind kind; + int age = 0; + protected: + Pet(PetKind _kind) : kind(_kind) {} + }; + struct Dog : Pet { + Dog() : Pet(PetKind::Dog) {} + std::string sound = "woof!"; + std::string bark() const { return sound; } + }; + + namespace pybind11 { + template<> struct polymorphic_type_hook { + static const void *get(const Pet *src, const std::type_info*& type) { + // note that src may be nullptr + if (src && src->kind == PetKind::Dog) { + type = &typeid(Dog); + return static_cast(src); + } + return src; + } + }; + } // namespace pybind11 + +When pybind11 wants to convert a C++ pointer of type ``Base*`` to a +Python object, it calls ``polymorphic_type_hook::get()`` to +determine if a downcast is possible. The ``get()`` function should use +whatever runtime information is available to determine if its ``src`` +parameter is in fact an instance of some class ``Derived`` that +inherits from ``Base``. If it finds such a ``Derived``, it sets ``type += &typeid(Derived)`` and returns a pointer to the ``Derived`` object +that contains ``src``. Otherwise, it just returns ``src``, leaving +``type`` at its default value of nullptr. If you set ``type`` to a +type that pybind11 doesn't know about, no downcasting will occur, and +the original ``src`` pointer will be used with its static type +``Base*``. + +It is critical that the returned pointer and ``type`` argument of +``get()`` agree with each other: if ``type`` is set to something +non-null, the returned pointer must point to the start of an object +whose type is ``type``. If the hierarchy being exposed uses only +single inheritance, a simple ``return src;`` will achieve this just +fine, but in the general case, you must cast ``src`` to the +appropriate derived-class pointer (e.g. using +``static_cast(src)``) before allowing it to be returned as a +``void*``. + +.. [#f4] https://llvm.org/docs/HowToSetUpLLVMStyleRTTI.html + +.. note:: + + pybind11's standard support for downcasting objects whose types + have virtual methods is implemented using + ``polymorphic_type_hook`` too, using the standard C++ ability to + determine the most-derived type of a polymorphic object using + ``typeid()`` and to cast a base pointer to that most-derived type + (even if you don't know what it is) using ``dynamic_cast``. + +.. seealso:: + + The file :file:`tests/test_tagbased_polymorphic.cpp` contains a + more complete example, including a demonstration of how to provide + automatic downcasting for an entire class hierarchy without + writing one get() function for each class. + +Accessing the type object +========================= + +You can get the type object from a C++ class that has already been registered using: + +.. code-block:: cpp + + py::type T_py = py::type::of(); + +You can directly use ``py::type::of(ob)`` to get the type object from any python +object, just like ``type(ob)`` in Python. + +.. note:: + + Other types, like ``py::type::of()``, do not work, see :ref:`type-conversions`. + +.. versionadded:: 2.6 + +Custom type setup +================= + +For advanced use cases, such as enabling garbage collection support, you may +wish to directly manipulate the ``PyHeapTypeObject`` corresponding to a +``py::class_`` definition. + +You can do that using ``py::custom_type_setup``: + +.. code-block:: cpp + + struct OwnsPythonObjects { + py::object value = py::none(); + }; + py::class_ cls( + m, "OwnsPythonObjects", py::custom_type_setup([](PyHeapTypeObject *heap_type) { + auto *type = &heap_type->ht_type; + type->tp_flags |= Py_TPFLAGS_HAVE_GC; + type->tp_traverse = [](PyObject *self_base, visitproc visit, void *arg) { + auto &self = py::cast(py::handle(self_base)); + Py_VISIT(self.value.ptr()); + return 0; + }; + type->tp_clear = [](PyObject *self_base) { + auto &self = py::cast(py::handle(self_base)); + self.value = py::none(); + return 0; + }; + })); + cls.def(py::init<>()); + cls.def_readwrite("value", &OwnsPythonObjects::value); + +.. versionadded:: 2.8 diff --git a/third_party/pybind11/docs/advanced/embedding.rst b/third_party/pybind11/docs/advanced/embedding.rst new file mode 100644 index 0000000000..dd980d483a --- /dev/null +++ b/third_party/pybind11/docs/advanced/embedding.rst @@ -0,0 +1,262 @@ +.. _embedding: + +Embedding the interpreter +######################### + +While pybind11 is mainly focused on extending Python using C++, it's also +possible to do the reverse: embed the Python interpreter into a C++ program. +All of the other documentation pages still apply here, so refer to them for +general pybind11 usage. This section will cover a few extra things required +for embedding. + +Getting started +=============== + +A basic executable with an embedded interpreter can be created with just a few +lines of CMake and the ``pybind11::embed`` target, as shown below. For more +information, see :doc:`/compiling`. + +.. code-block:: cmake + + cmake_minimum_required(VERSION 3.4) + project(example) + + find_package(pybind11 REQUIRED) # or `add_subdirectory(pybind11)` + + add_executable(example main.cpp) + target_link_libraries(example PRIVATE pybind11::embed) + +The essential structure of the ``main.cpp`` file looks like this: + +.. code-block:: cpp + + #include // everything needed for embedding + namespace py = pybind11; + + int main() { + py::scoped_interpreter guard{}; // start the interpreter and keep it alive + + py::print("Hello, World!"); // use the Python API + } + +The interpreter must be initialized before using any Python API, which includes +all the functions and classes in pybind11. The RAII guard class ``scoped_interpreter`` +takes care of the interpreter lifetime. After the guard is destroyed, the interpreter +shuts down and clears its memory. No Python functions can be called after this. + +Executing Python code +===================== + +There are a few different ways to run Python code. One option is to use ``eval``, +``exec`` or ``eval_file``, as explained in :ref:`eval`. Here is a quick example in +the context of an executable with an embedded interpreter: + +.. code-block:: cpp + + #include + namespace py = pybind11; + + int main() { + py::scoped_interpreter guard{}; + + py::exec(R"( + kwargs = dict(name="World", number=42) + message = "Hello, {name}! The answer is {number}".format(**kwargs) + print(message) + )"); + } + +Alternatively, similar results can be achieved using pybind11's API (see +:doc:`/advanced/pycpp/index` for more details). + +.. code-block:: cpp + + #include + namespace py = pybind11; + using namespace py::literals; + + int main() { + py::scoped_interpreter guard{}; + + auto kwargs = py::dict("name"_a="World", "number"_a=42); + auto message = "Hello, {name}! The answer is {number}"_s.format(**kwargs); + py::print(message); + } + +The two approaches can also be combined: + +.. code-block:: cpp + + #include + #include + + namespace py = pybind11; + using namespace py::literals; + + int main() { + py::scoped_interpreter guard{}; + + auto locals = py::dict("name"_a="World", "number"_a=42); + py::exec(R"( + message = "Hello, {name}! The answer is {number}".format(**locals()) + )", py::globals(), locals); + + auto message = locals["message"].cast(); + std::cout << message; + } + +Importing modules +================= + +Python modules can be imported using ``module_::import()``: + +.. code-block:: cpp + + py::module_ sys = py::module_::import("sys"); + py::print(sys.attr("path")); + +For convenience, the current working directory is included in ``sys.path`` when +embedding the interpreter. This makes it easy to import local Python files: + +.. code-block:: python + + """calc.py located in the working directory""" + + + def add(i, j): + return i + j + + +.. code-block:: cpp + + py::module_ calc = py::module_::import("calc"); + py::object result = calc.attr("add")(1, 2); + int n = result.cast(); + assert(n == 3); + +Modules can be reloaded using ``module_::reload()`` if the source is modified e.g. +by an external process. This can be useful in scenarios where the application +imports a user defined data processing script which needs to be updated after +changes by the user. Note that this function does not reload modules recursively. + +.. _embedding_modules: + +Adding embedded modules +======================= + +Embedded binary modules can be added using the ``PYBIND11_EMBEDDED_MODULE`` macro. +Note that the definition must be placed at global scope. They can be imported +like any other module. + +.. code-block:: cpp + + #include + namespace py = pybind11; + + PYBIND11_EMBEDDED_MODULE(fast_calc, m) { + // `m` is a `py::module_` which is used to bind functions and classes + m.def("add", [](int i, int j) { + return i + j; + }); + } + + int main() { + py::scoped_interpreter guard{}; + + auto fast_calc = py::module_::import("fast_calc"); + auto result = fast_calc.attr("add")(1, 2).cast(); + assert(result == 3); + } + +Unlike extension modules where only a single binary module can be created, on +the embedded side an unlimited number of modules can be added using multiple +``PYBIND11_EMBEDDED_MODULE`` definitions (as long as they have unique names). + +These modules are added to Python's list of builtins, so they can also be +imported in pure Python files loaded by the interpreter. Everything interacts +naturally: + +.. code-block:: python + + """py_module.py located in the working directory""" + import cpp_module + + a = cpp_module.a + b = a + 1 + + +.. code-block:: cpp + + #include + namespace py = pybind11; + + PYBIND11_EMBEDDED_MODULE(cpp_module, m) { + m.attr("a") = 1; + } + + int main() { + py::scoped_interpreter guard{}; + + auto py_module = py::module_::import("py_module"); + + auto locals = py::dict("fmt"_a="{} + {} = {}", **py_module.attr("__dict__")); + assert(locals["a"].cast() == 1); + assert(locals["b"].cast() == 2); + + py::exec(R"( + c = a + b + message = fmt.format(a, b, c) + )", py::globals(), locals); + + assert(locals["c"].cast() == 3); + assert(locals["message"].cast() == "1 + 2 = 3"); + } + + +Interpreter lifetime +==================== + +The Python interpreter shuts down when ``scoped_interpreter`` is destroyed. After +this, creating a new instance will restart the interpreter. Alternatively, the +``initialize_interpreter`` / ``finalize_interpreter`` pair of functions can be used +to directly set the state at any time. + +Modules created with pybind11 can be safely re-initialized after the interpreter +has been restarted. However, this may not apply to third-party extension modules. +The issue is that Python itself cannot completely unload extension modules and +there are several caveats with regard to interpreter restarting. In short, not +all memory may be freed, either due to Python reference cycles or user-created +global data. All the details can be found in the CPython documentation. + +.. warning:: + + Creating two concurrent ``scoped_interpreter`` guards is a fatal error. So is + calling ``initialize_interpreter`` for a second time after the interpreter + has already been initialized. + + Do not use the raw CPython API functions ``Py_Initialize`` and + ``Py_Finalize`` as these do not properly handle the lifetime of + pybind11's internal data. + + +Sub-interpreter support +======================= + +Creating multiple copies of ``scoped_interpreter`` is not possible because it +represents the main Python interpreter. Sub-interpreters are something different +and they do permit the existence of multiple interpreters. This is an advanced +feature of the CPython API and should be handled with care. pybind11 does not +currently offer a C++ interface for sub-interpreters, so refer to the CPython +documentation for all the details regarding this feature. + +We'll just mention a couple of caveats the sub-interpreters support in pybind11: + + 1. Sub-interpreters will not receive independent copies of embedded modules. + Instead, these are shared and modifications in one interpreter may be + reflected in another. + + 2. Managing multiple threads, multiple interpreters and the GIL can be + challenging and there are several caveats here, even within the pure + CPython API (please refer to the Python docs for details). As for + pybind11, keep in mind that ``gil_scoped_release`` and ``gil_scoped_acquire`` + do not take sub-interpreters into account. diff --git a/third_party/pybind11/docs/advanced/exceptions.rst b/third_party/pybind11/docs/advanced/exceptions.rst new file mode 100644 index 0000000000..2211caf5d3 --- /dev/null +++ b/third_party/pybind11/docs/advanced/exceptions.rst @@ -0,0 +1,398 @@ +Exceptions +########## + +Built-in C++ to Python exception translation +============================================ + +When Python calls C++ code through pybind11, pybind11 provides a C++ exception handler +that will trap C++ exceptions, translate them to the corresponding Python exception, +and raise them so that Python code can handle them. + +pybind11 defines translations for ``std::exception`` and its standard +subclasses, and several special exception classes that translate to specific +Python exceptions. Note that these are not actually Python exceptions, so they +cannot be examined using the Python C API. Instead, they are pure C++ objects +that pybind11 will translate the corresponding Python exception when they arrive +at its exception handler. + +.. tabularcolumns:: |p{0.5\textwidth}|p{0.45\textwidth}| + ++--------------------------------------+--------------------------------------+ +| Exception thrown by C++ | Translated to Python exception type | ++======================================+======================================+ +| :class:`std::exception` | ``RuntimeError`` | ++--------------------------------------+--------------------------------------+ +| :class:`std::bad_alloc` | ``MemoryError`` | ++--------------------------------------+--------------------------------------+ +| :class:`std::domain_error` | ``ValueError`` | ++--------------------------------------+--------------------------------------+ +| :class:`std::invalid_argument` | ``ValueError`` | ++--------------------------------------+--------------------------------------+ +| :class:`std::length_error` | ``ValueError`` | ++--------------------------------------+--------------------------------------+ +| :class:`std::out_of_range` | ``IndexError`` | ++--------------------------------------+--------------------------------------+ +| :class:`std::range_error` | ``ValueError`` | ++--------------------------------------+--------------------------------------+ +| :class:`std::overflow_error` | ``OverflowError`` | ++--------------------------------------+--------------------------------------+ +| :class:`pybind11::stop_iteration` | ``StopIteration`` (used to implement | +| | custom iterators) | ++--------------------------------------+--------------------------------------+ +| :class:`pybind11::index_error` | ``IndexError`` (used to indicate out | +| | of bounds access in ``__getitem__``, | +| | ``__setitem__``, etc.) | ++--------------------------------------+--------------------------------------+ +| :class:`pybind11::key_error` | ``KeyError`` (used to indicate out | +| | of bounds access in ``__getitem__``, | +| | ``__setitem__`` in dict-like | +| | objects, etc.) | ++--------------------------------------+--------------------------------------+ +| :class:`pybind11::value_error` | ``ValueError`` (used to indicate | +| | wrong value passed in | +| | ``container.remove(...)``) | ++--------------------------------------+--------------------------------------+ +| :class:`pybind11::type_error` | ``TypeError`` | ++--------------------------------------+--------------------------------------+ +| :class:`pybind11::buffer_error` | ``BufferError`` | ++--------------------------------------+--------------------------------------+ +| :class:`pybind11::import_error` | ``ImportError`` | ++--------------------------------------+--------------------------------------+ +| :class:`pybind11::attribute_error` | ``AttributeError`` | ++--------------------------------------+--------------------------------------+ +| Any other exception | ``RuntimeError`` | ++--------------------------------------+--------------------------------------+ + +Exception translation is not bidirectional. That is, *catching* the C++ +exceptions defined above will not trap exceptions that originate from +Python. For that, catch :class:`pybind11::error_already_set`. See :ref:`below +` for further details. + +There is also a special exception :class:`cast_error` that is thrown by +:func:`handle::call` when the input arguments cannot be converted to Python +objects. + +Registering custom translators +============================== + +If the default exception conversion policy described above is insufficient, +pybind11 also provides support for registering custom exception translators. +Similar to pybind11 classes, exception translators can be local to the module +they are defined in or global to the entire python session. To register a simple +exception conversion that translates a C++ exception into a new Python exception +using the C++ exception's ``what()`` method, a helper function is available: + +.. code-block:: cpp + + py::register_exception(module, "PyExp"); + +This call creates a Python exception class with the name ``PyExp`` in the given +module and automatically converts any encountered exceptions of type ``CppExp`` +into Python exceptions of type ``PyExp``. + +A matching function is available for registering a local exception translator: + +.. code-block:: cpp + + py::register_local_exception(module, "PyExp"); + + +It is possible to specify base class for the exception using the third +parameter, a ``handle``: + +.. code-block:: cpp + + py::register_exception(module, "PyExp", PyExc_RuntimeError); + py::register_local_exception(module, "PyExp", PyExc_RuntimeError); + +Then ``PyExp`` can be caught both as ``PyExp`` and ``RuntimeError``. + +The class objects of the built-in Python exceptions are listed in the Python +documentation on `Standard Exceptions `_. +The default base class is ``PyExc_Exception``. + +When more advanced exception translation is needed, the functions +``py::register_exception_translator(translator)`` and +``py::register_local_exception_translator(translator)`` can be used to register +functions that can translate arbitrary exception types (and which may include +additional logic to do so). The functions takes a stateless callable (e.g. a +function pointer or a lambda function without captured variables) with the call +signature ``void(std::exception_ptr)``. + +When a C++ exception is thrown, the registered exception translators are tried +in reverse order of registration (i.e. the last registered translator gets the +first shot at handling the exception). All local translators will be tried +before a global translator is tried. + +Inside the translator, ``std::rethrow_exception`` should be used within +a try block to re-throw the exception. One or more catch clauses to catch +the appropriate exceptions should then be used with each clause using +``PyErr_SetString`` to set a Python exception or ``ex(string)`` to set +the python exception to a custom exception type (see below). + +To declare a custom Python exception type, declare a ``py::exception`` variable +and use this in the associated exception translator (note: it is often useful +to make this a static declaration when using it inside a lambda expression +without requiring capturing). + +The following example demonstrates this for a hypothetical exception classes +``MyCustomException`` and ``OtherException``: the first is translated to a +custom python exception ``MyCustomError``, while the second is translated to a +standard python RuntimeError: + +.. code-block:: cpp + + static py::exception exc(m, "MyCustomError"); + py::register_exception_translator([](std::exception_ptr p) { + try { + if (p) std::rethrow_exception(p); + } catch (const MyCustomException &e) { + exc(e.what()); + } catch (const OtherException &e) { + PyErr_SetString(PyExc_RuntimeError, e.what()); + } + }); + +Multiple exceptions can be handled by a single translator, as shown in the +example above. If the exception is not caught by the current translator, the +previously registered one gets a chance. + +If none of the registered exception translators is able to handle the +exception, it is handled by the default converter as described in the previous +section. + +.. seealso:: + + The file :file:`tests/test_exceptions.cpp` contains examples + of various custom exception translators and custom exception types. + +.. note:: + + Call either ``PyErr_SetString`` or a custom exception's call + operator (``exc(string)``) for every exception caught in a custom exception + translator. Failure to do so will cause Python to crash with ``SystemError: + error return without exception set``. + + Exceptions that you do not plan to handle should simply not be caught, or + may be explicitly (re-)thrown to delegate it to the other, + previously-declared existing exception translators. + + Note that ``libc++`` and ``libstdc++`` `behave differently `_ + with ``-fvisibility=hidden``. Therefore exceptions that are used across ABI boundaries need to be explicitly exported, as exercised in ``tests/test_exceptions.h``. + See also: "Problems with C++ exceptions" under `GCC Wiki `_. + + +Local vs Global Exception Translators +===================================== + +When a global exception translator is registered, it will be applied across all +modules in the reverse order of registration. This can create behavior where the +order of module import influences how exceptions are translated. + +If module1 has the following translator: + +.. code-block:: cpp + + py::register_exception_translator([](std::exception_ptr p) { + try { + if (p) std::rethrow_exception(p); + } catch (const std::invalid_argument &e) { + PyErr_SetString("module1 handled this") + } + } + +and module2 has the following similar translator: + +.. code-block:: cpp + + py::register_exception_translator([](std::exception_ptr p) { + try { + if (p) std::rethrow_exception(p); + } catch (const std::invalid_argument &e) { + PyErr_SetString("module2 handled this") + } + } + +then which translator handles the invalid_argument will be determined by the +order that module1 and module2 are imported. Since exception translators are +applied in the reverse order of registration, which ever module was imported +last will "win" and that translator will be applied. + +If there are multiple pybind11 modules that share exception types (either +standard built-in or custom) loaded into a single python instance and +consistent error handling behavior is needed, then local translators should be +used. + +Changing the previous example to use ``register_local_exception_translator`` +would mean that when invalid_argument is thrown in the module2 code, the +module2 translator will always handle it, while in module1, the module1 +translator will do the same. + +.. _handling_python_exceptions_cpp: + +Handling exceptions from Python in C++ +====================================== + +When C++ calls Python functions, such as in a callback function or when +manipulating Python objects, and Python raises an ``Exception``, pybind11 +converts the Python exception into a C++ exception of type +:class:`pybind11::error_already_set` whose payload contains a C++ string textual +summary and the actual Python exception. ``error_already_set`` is used to +propagate Python exception back to Python (or possibly, handle them in C++). + +.. tabularcolumns:: |p{0.5\textwidth}|p{0.45\textwidth}| + ++--------------------------------------+--------------------------------------+ +| Exception raised in Python | Thrown as C++ exception type | ++======================================+======================================+ +| Any Python ``Exception`` | :class:`pybind11::error_already_set` | ++--------------------------------------+--------------------------------------+ + +For example: + +.. code-block:: cpp + + try { + // open("missing.txt", "r") + auto file = py::module_::import("io").attr("open")("missing.txt", "r"); + auto text = file.attr("read")(); + file.attr("close")(); + } catch (py::error_already_set &e) { + if (e.matches(PyExc_FileNotFoundError)) { + py::print("missing.txt not found"); + } else if (e.matches(PyExc_PermissionError)) { + py::print("missing.txt found but not accessible"); + } else { + throw; + } + } + +Note that C++ to Python exception translation does not apply here, since that is +a method for translating C++ exceptions to Python, not vice versa. The error raised +from Python is always ``error_already_set``. + +This example illustrates this behavior: + +.. code-block:: cpp + + try { + py::eval("raise ValueError('The Ring')"); + } catch (py::value_error &boromir) { + // Boromir never gets the ring + assert(false); + } catch (py::error_already_set &frodo) { + // Frodo gets the ring + py::print("I will take the ring"); + } + + try { + // py::value_error is a request for pybind11 to raise a Python exception + throw py::value_error("The ball"); + } catch (py::error_already_set &cat) { + // cat won't catch the ball since + // py::value_error is not a Python exception + assert(false); + } catch (py::value_error &dog) { + // dog will catch the ball + py::print("Run Spot run"); + throw; // Throw it again (pybind11 will raise ValueError) + } + +Handling errors from the Python C API +===================================== + +Where possible, use :ref:`pybind11 wrappers ` instead of calling +the Python C API directly. When calling the Python C API directly, in +addition to manually managing reference counts, one must follow the pybind11 +error protocol, which is outlined here. + +After calling the Python C API, if Python returns an error, +``throw py::error_already_set();``, which allows pybind11 to deal with the +exception and pass it back to the Python interpreter. This includes calls to +the error setting functions such as ``PyErr_SetString``. + +.. code-block:: cpp + + PyErr_SetString(PyExc_TypeError, "C API type error demo"); + throw py::error_already_set(); + + // But it would be easier to simply... + throw py::type_error("pybind11 wrapper type error"); + +Alternately, to ignore the error, call `PyErr_Clear +`_. + +Any Python error must be thrown or cleared, or Python/pybind11 will be left in +an invalid state. + +Chaining exceptions ('raise from') +================================== + +Python has a mechanism for indicating that exceptions were caused by other +exceptions: + +.. code-block:: py + + try: + print(1 / 0) + except Exception as exc: + raise RuntimeError("could not divide by zero") from exc + +To do a similar thing in pybind11, you can use the ``py::raise_from`` function. It +sets the current python error indicator, so to continue propagating the exception +you should ``throw py::error_already_set()``. + +.. code-block:: cpp + + try { + py::eval("print(1 / 0")); + } catch (py::error_already_set &e) { + py::raise_from(e, PyExc_RuntimeError, "could not divide by zero"); + throw py::error_already_set(); + } + +.. versionadded:: 2.8 + +.. _unraisable_exceptions: + +Handling unraisable exceptions +============================== + +If a Python function invoked from a C++ destructor or any function marked +``noexcept(true)`` (collectively, "noexcept functions") throws an exception, there +is no way to propagate the exception, as such functions may not throw. +Should they throw or fail to catch any exceptions in their call graph, +the C++ runtime calls ``std::terminate()`` to abort immediately. + +Similarly, Python exceptions raised in a class's ``__del__`` method do not +propagate, but are logged by Python as an unraisable error. In Python 3.8+, a +`system hook is triggered +`_ +and an auditing event is logged. + +Any noexcept function should have a try-catch block that traps +class:`error_already_set` (or any other exception that can occur). Note that +pybind11 wrappers around Python exceptions such as +:class:`pybind11::value_error` are *not* Python exceptions; they are C++ +exceptions that pybind11 catches and converts to Python exceptions. Noexcept +functions cannot propagate these exceptions either. A useful approach is to +convert them to Python exceptions and then ``discard_as_unraisable`` as shown +below. + +.. code-block:: cpp + + void nonthrowing_func() noexcept(true) { + try { + // ... + } catch (py::error_already_set &eas) { + // Discard the Python error using Python APIs, using the C++ magic + // variable __func__. Python already knows the type and value and of the + // exception object. + eas.discard_as_unraisable(__func__); + } catch (const std::exception &e) { + // Log and discard C++ exceptions. + third_party::log(e); + } + } + +.. versionadded:: 2.6 diff --git a/third_party/pybind11/docs/advanced/functions.rst b/third_party/pybind11/docs/advanced/functions.rst new file mode 100644 index 0000000000..69e3d8a1df --- /dev/null +++ b/third_party/pybind11/docs/advanced/functions.rst @@ -0,0 +1,614 @@ +Functions +######### + +Before proceeding with this section, make sure that you are already familiar +with the basics of binding functions and classes, as explained in :doc:`/basics` +and :doc:`/classes`. The following guide is applicable to both free and member +functions, i.e. *methods* in Python. + +.. _return_value_policies: + +Return value policies +===================== + +Python and C++ use fundamentally different ways of managing the memory and +lifetime of objects managed by them. This can lead to issues when creating +bindings for functions that return a non-trivial type. Just by looking at the +type information, it is not clear whether Python should take charge of the +returned value and eventually free its resources, or if this is handled on the +C++ side. For this reason, pybind11 provides a several *return value policy* +annotations that can be passed to the :func:`module_::def` and +:func:`class_::def` functions. The default policy is +:enum:`return_value_policy::automatic`. + +Return value policies are tricky, and it's very important to get them right. +Just to illustrate what can go wrong, consider the following simple example: + +.. code-block:: cpp + + /* Function declaration */ + Data *get_data() { return _data; /* (pointer to a static data structure) */ } + ... + + /* Binding code */ + m.def("get_data", &get_data); // <-- KABOOM, will cause crash when called from Python + +What's going on here? When ``get_data()`` is called from Python, the return +value (a native C++ type) must be wrapped to turn it into a usable Python type. +In this case, the default return value policy (:enum:`return_value_policy::automatic`) +causes pybind11 to assume ownership of the static ``_data`` instance. + +When Python's garbage collector eventually deletes the Python +wrapper, pybind11 will also attempt to delete the C++ instance (via ``operator +delete()``) due to the implied ownership. At this point, the entire application +will come crashing down, though errors could also be more subtle and involve +silent data corruption. + +In the above example, the policy :enum:`return_value_policy::reference` should have +been specified so that the global data instance is only *referenced* without any +implied transfer of ownership, i.e.: + +.. code-block:: cpp + + m.def("get_data", &get_data, py::return_value_policy::reference); + +On the other hand, this is not the right policy for many other situations, +where ignoring ownership could lead to resource leaks. +As a developer using pybind11, it's important to be familiar with the different +return value policies, including which situation calls for which one of them. +The following table provides an overview of available policies: + +.. tabularcolumns:: |p{0.5\textwidth}|p{0.45\textwidth}| + ++--------------------------------------------------+----------------------------------------------------------------------------+ +| Return value policy | Description | ++==================================================+============================================================================+ +| :enum:`return_value_policy::take_ownership` | Reference an existing object (i.e. do not create a new copy) and take | +| | ownership. Python will call the destructor and delete operator when the | +| | object's reference count reaches zero. Undefined behavior ensues when the | +| | C++ side does the same, or when the data was not dynamically allocated. | ++--------------------------------------------------+----------------------------------------------------------------------------+ +| :enum:`return_value_policy::copy` | Create a new copy of the returned object, which will be owned by Python. | +| | This policy is comparably safe because the lifetimes of the two instances | +| | are decoupled. | ++--------------------------------------------------+----------------------------------------------------------------------------+ +| :enum:`return_value_policy::move` | Use ``std::move`` to move the return value contents into a new instance | +| | that will be owned by Python. This policy is comparably safe because the | +| | lifetimes of the two instances (move source and destination) are decoupled.| ++--------------------------------------------------+----------------------------------------------------------------------------+ +| :enum:`return_value_policy::reference` | Reference an existing object, but do not take ownership. The C++ side is | +| | responsible for managing the object's lifetime and deallocating it when | +| | it is no longer used. Warning: undefined behavior will ensue when the C++ | +| | side deletes an object that is still referenced and used by Python. | ++--------------------------------------------------+----------------------------------------------------------------------------+ +| :enum:`return_value_policy::reference_internal` | Indicates that the lifetime of the return value is tied to the lifetime | +| | of a parent object, namely the implicit ``this``, or ``self`` argument of | +| | the called method or property. Internally, this policy works just like | +| | :enum:`return_value_policy::reference` but additionally applies a | +| | ``keep_alive<0, 1>`` *call policy* (described in the next section) that | +| | prevents the parent object from being garbage collected as long as the | +| | return value is referenced by Python. This is the default policy for | +| | property getters created via ``def_property``, ``def_readwrite``, etc. | ++--------------------------------------------------+----------------------------------------------------------------------------+ +| :enum:`return_value_policy::automatic` | This policy falls back to the policy | +| | :enum:`return_value_policy::take_ownership` when the return value is a | +| | pointer. Otherwise, it uses :enum:`return_value_policy::move` or | +| | :enum:`return_value_policy::copy` for rvalue and lvalue references, | +| | respectively. See above for a description of what all of these different | +| | policies do. This is the default policy for ``py::class_``-wrapped types. | ++--------------------------------------------------+----------------------------------------------------------------------------+ +| :enum:`return_value_policy::automatic_reference` | As above, but use policy :enum:`return_value_policy::reference` when the | +| | return value is a pointer. This is the default conversion policy for | +| | function arguments when calling Python functions manually from C++ code | +| | (i.e. via ``handle::operator()``) and the casters in ``pybind11/stl.h``. | +| | You probably won't need to use this explicitly. | ++--------------------------------------------------+----------------------------------------------------------------------------+ + +Return value policies can also be applied to properties: + +.. code-block:: cpp + + class_(m, "MyClass") + .def_property("data", &MyClass::getData, &MyClass::setData, + py::return_value_policy::copy); + +Technically, the code above applies the policy to both the getter and the +setter function, however, the setter doesn't really care about *return* +value policies which makes this a convenient terse syntax. Alternatively, +targeted arguments can be passed through the :class:`cpp_function` constructor: + +.. code-block:: cpp + + class_(m, "MyClass") + .def_property("data", + py::cpp_function(&MyClass::getData, py::return_value_policy::copy), + py::cpp_function(&MyClass::setData) + ); + +.. warning:: + + Code with invalid return value policies might access uninitialized memory or + free data structures multiple times, which can lead to hard-to-debug + non-determinism and segmentation faults, hence it is worth spending the + time to understand all the different options in the table above. + +.. note:: + + One important aspect of the above policies is that they only apply to + instances which pybind11 has *not* seen before, in which case the policy + clarifies essential questions about the return value's lifetime and + ownership. When pybind11 knows the instance already (as identified by its + type and address in memory), it will return the existing Python object + wrapper rather than creating a new copy. + +.. note:: + + The next section on :ref:`call_policies` discusses *call policies* that can be + specified *in addition* to a return value policy from the list above. Call + policies indicate reference relationships that can involve both return values + and parameters of functions. + +.. note:: + + As an alternative to elaborate call policies and lifetime management logic, + consider using smart pointers (see the section on :ref:`smart_pointers` for + details). Smart pointers can tell whether an object is still referenced from + C++ or Python, which generally eliminates the kinds of inconsistencies that + can lead to crashes or undefined behavior. For functions returning smart + pointers, it is not necessary to specify a return value policy. + +.. _call_policies: + +Additional call policies +======================== + +In addition to the above return value policies, further *call policies* can be +specified to indicate dependencies between parameters or ensure a certain state +for the function call. + +Keep alive +---------- + +In general, this policy is required when the C++ object is any kind of container +and another object is being added to the container. ``keep_alive`` +indicates that the argument with index ``Patient`` should be kept alive at least +until the argument with index ``Nurse`` is freed by the garbage collector. Argument +indices start at one, while zero refers to the return value. For methods, index +``1`` refers to the implicit ``this`` pointer, while regular arguments begin at +index ``2``. Arbitrarily many call policies can be specified. When a ``Nurse`` +with value ``None`` is detected at runtime, the call policy does nothing. + +When the nurse is not a pybind11-registered type, the implementation internally +relies on the ability to create a *weak reference* to the nurse object. When +the nurse object is not a pybind11-registered type and does not support weak +references, an exception will be thrown. + +If you use an incorrect argument index, you will get a ``RuntimeError`` saying +``Could not activate keep_alive!``. You should review the indices you're using. + +Consider the following example: here, the binding code for a list append +operation ties the lifetime of the newly added element to the underlying +container: + +.. code-block:: cpp + + py::class_(m, "List") + .def("append", &List::append, py::keep_alive<1, 2>()); + +For consistency, the argument indexing is identical for constructors. Index +``1`` still refers to the implicit ``this`` pointer, i.e. the object which is +being constructed. Index ``0`` refers to the return type which is presumed to +be ``void`` when a constructor is viewed like a function. The following example +ties the lifetime of the constructor element to the constructed object: + +.. code-block:: cpp + + py::class_(m, "Nurse") + .def(py::init(), py::keep_alive<1, 2>()); + +.. note:: + + ``keep_alive`` is analogous to the ``with_custodian_and_ward`` (if Nurse, + Patient != 0) and ``with_custodian_and_ward_postcall`` (if Nurse/Patient == + 0) policies from Boost.Python. + +Call guard +---------- + +The ``call_guard`` policy allows any scope guard type ``T`` to be placed +around the function call. For example, this definition: + +.. code-block:: cpp + + m.def("foo", foo, py::call_guard()); + +is equivalent to the following pseudocode: + +.. code-block:: cpp + + m.def("foo", [](args...) { + T scope_guard; + return foo(args...); // forwarded arguments + }); + +The only requirement is that ``T`` is default-constructible, but otherwise any +scope guard will work. This is very useful in combination with ``gil_scoped_release``. +See :ref:`gil`. + +Multiple guards can also be specified as ``py::call_guard``. The +constructor order is left to right and destruction happens in reverse. + +.. seealso:: + + The file :file:`tests/test_call_policies.cpp` contains a complete example + that demonstrates using `keep_alive` and `call_guard` in more detail. + +.. _python_objects_as_args: + +Python objects as arguments +=========================== + +pybind11 exposes all major Python types using thin C++ wrapper classes. These +wrapper classes can also be used as parameters of functions in bindings, which +makes it possible to directly work with native Python types on the C++ side. +For instance, the following statement iterates over a Python ``dict``: + +.. code-block:: cpp + + void print_dict(const py::dict& dict) { + /* Easily interact with Python types */ + for (auto item : dict) + std::cout << "key=" << std::string(py::str(item.first)) << ", " + << "value=" << std::string(py::str(item.second)) << std::endl; + } + +It can be exported: + +.. code-block:: cpp + + m.def("print_dict", &print_dict); + +And used in Python as usual: + +.. code-block:: pycon + + >>> print_dict({"foo": 123, "bar": "hello"}) + key=foo, value=123 + key=bar, value=hello + +For more information on using Python objects in C++, see :doc:`/advanced/pycpp/index`. + +Accepting \*args and \*\*kwargs +=============================== + +Python provides a useful mechanism to define functions that accept arbitrary +numbers of arguments and keyword arguments: + +.. code-block:: python + + def generic(*args, **kwargs): + ... # do something with args and kwargs + +Such functions can also be created using pybind11: + +.. code-block:: cpp + + void generic(py::args args, const py::kwargs& kwargs) { + /// .. do something with args + if (kwargs) + /// .. do something with kwargs + } + + /// Binding code + m.def("generic", &generic); + +The class ``py::args`` derives from ``py::tuple`` and ``py::kwargs`` derives +from ``py::dict``. + +You may also use just one or the other, and may combine these with other +arguments. Note, however, that ``py::kwargs`` must always be the last argument +of the function, and ``py::args`` implies that any further arguments are +keyword-only (see :ref:`keyword_only_arguments`). + +Please refer to the other examples for details on how to iterate over these, +and on how to cast their entries into C++ objects. A demonstration is also +available in ``tests/test_kwargs_and_defaults.cpp``. + +.. note:: + + When combining \*args or \*\*kwargs with :ref:`keyword_args` you should + *not* include ``py::arg`` tags for the ``py::args`` and ``py::kwargs`` + arguments. + +Default arguments revisited +=========================== + +The section on :ref:`default_args` previously discussed basic usage of default +arguments using pybind11. One noteworthy aspect of their implementation is that +default arguments are converted to Python objects right at declaration time. +Consider the following example: + +.. code-block:: cpp + + py::class_("MyClass") + .def("myFunction", py::arg("arg") = SomeType(123)); + +In this case, pybind11 must already be set up to deal with values of the type +``SomeType`` (via a prior instantiation of ``py::class_``), or an +exception will be thrown. + +Another aspect worth highlighting is that the "preview" of the default argument +in the function signature is generated using the object's ``__repr__`` method. +If not available, the signature may not be very helpful, e.g.: + +.. code-block:: pycon + + FUNCTIONS + ... + | myFunction(...) + | Signature : (MyClass, arg : SomeType = ) -> NoneType + ... + +The first way of addressing this is by defining ``SomeType.__repr__``. +Alternatively, it is possible to specify the human-readable preview of the +default argument manually using the ``arg_v`` notation: + +.. code-block:: cpp + + py::class_("MyClass") + .def("myFunction", py::arg_v("arg", SomeType(123), "SomeType(123)")); + +Sometimes it may be necessary to pass a null pointer value as a default +argument. In this case, remember to cast it to the underlying type in question, +like so: + +.. code-block:: cpp + + py::class_("MyClass") + .def("myFunction", py::arg("arg") = static_cast(nullptr)); + +.. _keyword_only_arguments: + +Keyword-only arguments +====================== + +Python implements keyword-only arguments by specifying an unnamed ``*`` +argument in a function definition: + +.. code-block:: python + + def f(a, *, b): # a can be positional or via keyword; b must be via keyword + pass + + + f(a=1, b=2) # good + f(b=2, a=1) # good + f(1, b=2) # good + f(1, 2) # TypeError: f() takes 1 positional argument but 2 were given + +Pybind11 provides a ``py::kw_only`` object that allows you to implement +the same behaviour by specifying the object between positional and keyword-only +argument annotations when registering the function: + +.. code-block:: cpp + + m.def("f", [](int a, int b) { /* ... */ }, + py::arg("a"), py::kw_only(), py::arg("b")); + +.. versionadded:: 2.6 + +A ``py::args`` argument implies that any following arguments are keyword-only, +as if ``py::kw_only()`` had been specified in the same relative location of the +argument list as the ``py::args`` argument. The ``py::kw_only()`` may be +included to be explicit about this, but is not required. + +.. versionchanged:: 2.9 + This can now be combined with ``py::args``. Before, ``py::args`` could only + occur at the end of the argument list, or immediately before a ``py::kwargs`` + argument at the end. + + +Positional-only arguments +========================= + +Python 3.8 introduced a new positional-only argument syntax, using ``/`` in the +function definition (note that this has been a convention for CPython +positional arguments, such as in ``pow()``, since Python 2). You can +do the same thing in any version of Python using ``py::pos_only()``: + +.. code-block:: cpp + + m.def("f", [](int a, int b) { /* ... */ }, + py::arg("a"), py::pos_only(), py::arg("b")); + +You now cannot give argument ``a`` by keyword. This can be combined with +keyword-only arguments, as well. + +.. versionadded:: 2.6 + +.. _nonconverting_arguments: + +Non-converting arguments +======================== + +Certain argument types may support conversion from one type to another. Some +examples of conversions are: + +* :ref:`implicit_conversions` declared using ``py::implicitly_convertible()`` +* Calling a method accepting a double with an integer argument +* Calling a ``std::complex`` argument with a non-complex python type + (for example, with a float). (Requires the optional ``pybind11/complex.h`` + header). +* Calling a function taking an Eigen matrix reference with a numpy array of the + wrong type or of an incompatible data layout. (Requires the optional + ``pybind11/eigen.h`` header). + +This behaviour is sometimes undesirable: the binding code may prefer to raise +an error rather than convert the argument. This behaviour can be obtained +through ``py::arg`` by calling the ``.noconvert()`` method of the ``py::arg`` +object, such as: + +.. code-block:: cpp + + m.def("floats_only", [](double f) { return 0.5 * f; }, py::arg("f").noconvert()); + m.def("floats_preferred", [](double f) { return 0.5 * f; }, py::arg("f")); + +Attempting the call the second function (the one without ``.noconvert()``) with +an integer will succeed, but attempting to call the ``.noconvert()`` version +will fail with a ``TypeError``: + +.. code-block:: pycon + + >>> floats_preferred(4) + 2.0 + >>> floats_only(4) + Traceback (most recent call last): + File "", line 1, in + TypeError: floats_only(): incompatible function arguments. The following argument types are supported: + 1. (f: float) -> float + + Invoked with: 4 + +You may, of course, combine this with the :var:`_a` shorthand notation (see +:ref:`keyword_args`) and/or :ref:`default_args`. It is also permitted to omit +the argument name by using the ``py::arg()`` constructor without an argument +name, i.e. by specifying ``py::arg().noconvert()``. + +.. note:: + + When specifying ``py::arg`` options it is necessary to provide the same + number of options as the bound function has arguments. Thus if you want to + enable no-convert behaviour for just one of several arguments, you will + need to specify a ``py::arg()`` annotation for each argument with the + no-convert argument modified to ``py::arg().noconvert()``. + +.. _none_arguments: + +Allow/Prohibiting None arguments +================================ + +When a C++ type registered with :class:`py::class_` is passed as an argument to +a function taking the instance as pointer or shared holder (e.g. ``shared_ptr`` +or a custom, copyable holder as described in :ref:`smart_pointers`), pybind +allows ``None`` to be passed from Python which results in calling the C++ +function with ``nullptr`` (or an empty holder) for the argument. + +To explicitly enable or disable this behaviour, using the +``.none`` method of the :class:`py::arg` object: + +.. code-block:: cpp + + py::class_(m, "Dog").def(py::init<>()); + py::class_(m, "Cat").def(py::init<>()); + m.def("bark", [](Dog *dog) -> std::string { + if (dog) return "woof!"; /* Called with a Dog instance */ + else return "(no dog)"; /* Called with None, dog == nullptr */ + }, py::arg("dog").none(true)); + m.def("meow", [](Cat *cat) -> std::string { + // Can't be called with None argument + return "meow"; + }, py::arg("cat").none(false)); + +With the above, the Python call ``bark(None)`` will return the string ``"(no +dog)"``, while attempting to call ``meow(None)`` will raise a ``TypeError``: + +.. code-block:: pycon + + >>> from animals import Dog, Cat, bark, meow + >>> bark(Dog()) + 'woof!' + >>> meow(Cat()) + 'meow' + >>> bark(None) + '(no dog)' + >>> meow(None) + Traceback (most recent call last): + File "", line 1, in + TypeError: meow(): incompatible function arguments. The following argument types are supported: + 1. (cat: animals.Cat) -> str + + Invoked with: None + +The default behaviour when the tag is unspecified is to allow ``None``. + +.. note:: + + Even when ``.none(true)`` is specified for an argument, ``None`` will be converted to a + ``nullptr`` *only* for custom and :ref:`opaque ` types. Pointers to built-in types + (``double *``, ``int *``, ...) and STL types (``std::vector *``, ...; if ``pybind11/stl.h`` + is included) are copied when converted to C++ (see :doc:`/advanced/cast/overview`) and will + not allow ``None`` as argument. To pass optional argument of these copied types consider + using ``std::optional`` + +.. _overload_resolution: + +Overload resolution order +========================= + +When a function or method with multiple overloads is called from Python, +pybind11 determines which overload to call in two passes. The first pass +attempts to call each overload without allowing argument conversion (as if +every argument had been specified as ``py::arg().noconvert()`` as described +above). + +If no overload succeeds in the no-conversion first pass, a second pass is +attempted in which argument conversion is allowed (except where prohibited via +an explicit ``py::arg().noconvert()`` attribute in the function definition). + +If the second pass also fails a ``TypeError`` is raised. + +Within each pass, overloads are tried in the order they were registered with +pybind11. If the ``py::prepend()`` tag is added to the definition, a function +can be placed at the beginning of the overload sequence instead, allowing user +overloads to proceed built in functions. + +What this means in practice is that pybind11 will prefer any overload that does +not require conversion of arguments to an overload that does, but otherwise +prefers earlier-defined overloads to later-defined ones. + +.. note:: + + pybind11 does *not* further prioritize based on the number/pattern of + overloaded arguments. That is, pybind11 does not prioritize a function + requiring one conversion over one requiring three, but only prioritizes + overloads requiring no conversion at all to overloads that require + conversion of at least one argument. + +.. versionadded:: 2.6 + + The ``py::prepend()`` tag. + +Binding functions with template parameters +========================================== + +You can bind functions that have template parameters. Here's a function: + +.. code-block:: cpp + + template + void set(T t); + +C++ templates cannot be instantiated at runtime, so you cannot bind the +non-instantiated function: + +.. code-block:: cpp + + // BROKEN (this will not compile) + m.def("set", &set); + +You must bind each instantiated function template separately. You may bind +each instantiation with the same name, which will be treated the same as +an overloaded function: + +.. code-block:: cpp + + m.def("set", &set); + m.def("set", &set); + +Sometimes it's more clear to bind them with separate names, which is also +an option: + +.. code-block:: cpp + + m.def("setInt", &set); + m.def("setString", &set); diff --git a/third_party/pybind11/docs/advanced/misc.rst b/third_party/pybind11/docs/advanced/misc.rst new file mode 100644 index 0000000000..edab15fcb7 --- /dev/null +++ b/third_party/pybind11/docs/advanced/misc.rst @@ -0,0 +1,337 @@ +Miscellaneous +############# + +.. _macro_notes: + +General notes regarding convenience macros +========================================== + +pybind11 provides a few convenience macros such as +:func:`PYBIND11_DECLARE_HOLDER_TYPE` and ``PYBIND11_OVERRIDE_*``. Since these +are "just" macros that are evaluated in the preprocessor (which has no concept +of types), they *will* get confused by commas in a template argument; for +example, consider: + +.. code-block:: cpp + + PYBIND11_OVERRIDE(MyReturnType, Class, func) + +The limitation of the C preprocessor interprets this as five arguments (with new +arguments beginning after each comma) rather than three. To get around this, +there are two alternatives: you can use a type alias, or you can wrap the type +using the ``PYBIND11_TYPE`` macro: + +.. code-block:: cpp + + // Version 1: using a type alias + using ReturnType = MyReturnType; + using ClassType = Class; + PYBIND11_OVERRIDE(ReturnType, ClassType, func); + + // Version 2: using the PYBIND11_TYPE macro: + PYBIND11_OVERRIDE(PYBIND11_TYPE(MyReturnType), + PYBIND11_TYPE(Class), func) + +The ``PYBIND11_MAKE_OPAQUE`` macro does *not* require the above workarounds. + +.. _gil: + +Global Interpreter Lock (GIL) +============================= + +When calling a C++ function from Python, the GIL is always held. +The classes :class:`gil_scoped_release` and :class:`gil_scoped_acquire` can be +used to acquire and release the global interpreter lock in the body of a C++ +function call. In this way, long-running C++ code can be parallelized using +multiple Python threads. Taking :ref:`overriding_virtuals` as an example, this +could be realized as follows (important changes highlighted): + +.. code-block:: cpp + :emphasize-lines: 8,9,31,32 + + class PyAnimal : public Animal { + public: + /* Inherit the constructors */ + using Animal::Animal; + + /* Trampoline (need one for each virtual function) */ + std::string go(int n_times) { + /* Acquire GIL before calling Python code */ + py::gil_scoped_acquire acquire; + + PYBIND11_OVERRIDE_PURE( + std::string, /* Return type */ + Animal, /* Parent class */ + go, /* Name of function */ + n_times /* Argument(s) */ + ); + } + }; + + PYBIND11_MODULE(example, m) { + py::class_ animal(m, "Animal"); + animal + .def(py::init<>()) + .def("go", &Animal::go); + + py::class_(m, "Dog", animal) + .def(py::init<>()); + + m.def("call_go", [](Animal *animal) -> std::string { + /* Release GIL before calling into (potentially long-running) C++ code */ + py::gil_scoped_release release; + return call_go(animal); + }); + } + +The ``call_go`` wrapper can also be simplified using the ``call_guard`` policy +(see :ref:`call_policies`) which yields the same result: + +.. code-block:: cpp + + m.def("call_go", &call_go, py::call_guard()); + + +Binding sequence data types, iterators, the slicing protocol, etc. +================================================================== + +Please refer to the supplemental example for details. + +.. seealso:: + + The file :file:`tests/test_sequences_and_iterators.cpp` contains a + complete example that shows how to bind a sequence data type, including + length queries (``__len__``), iterators (``__iter__``), the slicing + protocol and other kinds of useful operations. + + +Partitioning code over multiple extension modules +================================================= + +It's straightforward to split binding code over multiple extension modules, +while referencing types that are declared elsewhere. Everything "just" works +without any special precautions. One exception to this rule occurs when +extending a type declared in another extension module. Recall the basic example +from Section :ref:`inheritance`. + +.. code-block:: cpp + + py::class_ pet(m, "Pet"); + pet.def(py::init()) + .def_readwrite("name", &Pet::name); + + py::class_(m, "Dog", pet /* <- specify parent */) + .def(py::init()) + .def("bark", &Dog::bark); + +Suppose now that ``Pet`` bindings are defined in a module named ``basic``, +whereas the ``Dog`` bindings are defined somewhere else. The challenge is of +course that the variable ``pet`` is not available anymore though it is needed +to indicate the inheritance relationship to the constructor of ``class_``. +However, it can be acquired as follows: + +.. code-block:: cpp + + py::object pet = (py::object) py::module_::import("basic").attr("Pet"); + + py::class_(m, "Dog", pet) + .def(py::init()) + .def("bark", &Dog::bark); + +Alternatively, you can specify the base class as a template parameter option to +``class_``, which performs an automated lookup of the corresponding Python +type. Like the above code, however, this also requires invoking the ``import`` +function once to ensure that the pybind11 binding code of the module ``basic`` +has been executed: + +.. code-block:: cpp + + py::module_::import("basic"); + + py::class_(m, "Dog") + .def(py::init()) + .def("bark", &Dog::bark); + +Naturally, both methods will fail when there are cyclic dependencies. + +Note that pybind11 code compiled with hidden-by-default symbol visibility (e.g. +via the command line flag ``-fvisibility=hidden`` on GCC/Clang), which is +required for proper pybind11 functionality, can interfere with the ability to +access types defined in another extension module. Working around this requires +manually exporting types that are accessed by multiple extension modules; +pybind11 provides a macro to do just this: + +.. code-block:: cpp + + class PYBIND11_EXPORT Dog : public Animal { + ... + }; + +Note also that it is possible (although would rarely be required) to share arbitrary +C++ objects between extension modules at runtime. Internal library data is shared +between modules using capsule machinery [#f6]_ which can be also utilized for +storing, modifying and accessing user-defined data. Note that an extension module +will "see" other extensions' data if and only if they were built with the same +pybind11 version. Consider the following example: + +.. code-block:: cpp + + auto data = reinterpret_cast(py::get_shared_data("mydata")); + if (!data) + data = static_cast(py::set_shared_data("mydata", new MyData(42))); + +If the above snippet was used in several separately compiled extension modules, +the first one to be imported would create a ``MyData`` instance and associate +a ``"mydata"`` key with a pointer to it. Extensions that are imported later +would be then able to access the data behind the same pointer. + +.. [#f6] https://docs.python.org/3/extending/extending.html#using-capsules + +Module Destructors +================== + +pybind11 does not provide an explicit mechanism to invoke cleanup code at +module destruction time. In rare cases where such functionality is required, it +is possible to emulate it using Python capsules or weak references with a +destruction callback. + +.. code-block:: cpp + + auto cleanup_callback = []() { + // perform cleanup here -- this function is called with the GIL held + }; + + m.add_object("_cleanup", py::capsule(cleanup_callback)); + +This approach has the potential downside that instances of classes exposed +within the module may still be alive when the cleanup callback is invoked +(whether this is acceptable will generally depend on the application). + +Alternatively, the capsule may also be stashed within a type object, which +ensures that it not called before all instances of that type have been +collected: + +.. code-block:: cpp + + auto cleanup_callback = []() { /* ... */ }; + m.attr("BaseClass").attr("_cleanup") = py::capsule(cleanup_callback); + +Both approaches also expose a potentially dangerous ``_cleanup`` attribute in +Python, which may be undesirable from an API standpoint (a premature explicit +call from Python might lead to undefined behavior). Yet another approach that +avoids this issue involves weak reference with a cleanup callback: + +.. code-block:: cpp + + // Register a callback function that is invoked when the BaseClass object is collected + py::cpp_function cleanup_callback( + [](py::handle weakref) { + // perform cleanup here -- this function is called with the GIL held + + weakref.dec_ref(); // release weak reference + } + ); + + // Create a weak reference with a cleanup callback and initially leak it + (void) py::weakref(m.attr("BaseClass"), cleanup_callback).release(); + +.. note:: + + PyPy does not garbage collect objects when the interpreter exits. An alternative + approach (which also works on CPython) is to use the :py:mod:`atexit` module [#f7]_, + for example: + + .. code-block:: cpp + + auto atexit = py::module_::import("atexit"); + atexit.attr("register")(py::cpp_function([]() { + // perform cleanup here -- this function is called with the GIL held + })); + + .. [#f7] https://docs.python.org/3/library/atexit.html + + +Generating documentation using Sphinx +===================================== + +Sphinx [#f4]_ has the ability to inspect the signatures and documentation +strings in pybind11-based extension modules to automatically generate beautiful +documentation in a variety formats. The python_example repository [#f5]_ contains a +simple example repository which uses this approach. + +There are two potential gotchas when using this approach: first, make sure that +the resulting strings do not contain any :kbd:`TAB` characters, which break the +docstring parsing routines. You may want to use C++11 raw string literals, +which are convenient for multi-line comments. Conveniently, any excess +indentation will be automatically be removed by Sphinx. However, for this to +work, it is important that all lines are indented consistently, i.e.: + +.. code-block:: cpp + + // ok + m.def("foo", &foo, R"mydelimiter( + The foo function + + Parameters + ---------- + )mydelimiter"); + + // *not ok* + m.def("foo", &foo, R"mydelimiter(The foo function + + Parameters + ---------- + )mydelimiter"); + +By default, pybind11 automatically generates and prepends a signature to the docstring of a function +registered with ``module_::def()`` and ``class_::def()``. Sometimes this +behavior is not desirable, because you want to provide your own signature or remove +the docstring completely to exclude the function from the Sphinx documentation. +The class ``options`` allows you to selectively suppress auto-generated signatures: + +.. code-block:: cpp + + PYBIND11_MODULE(example, m) { + py::options options; + options.disable_function_signatures(); + + m.def("add", [](int a, int b) { return a + b; }, "A function which adds two numbers"); + } + +Note that changes to the settings affect only function bindings created during the +lifetime of the ``options`` instance. When it goes out of scope at the end of the module's init function, +the default settings are restored to prevent unwanted side effects. + +.. [#f4] http://www.sphinx-doc.org +.. [#f5] http://github.com/pybind/python_example + +.. _avoiding-cpp-types-in-docstrings: + +Avoiding C++ types in docstrings +================================ + +Docstrings are generated at the time of the declaration, e.g. when ``.def(...)`` is called. +At this point parameter and return types should be known to pybind11. +If a custom type is not exposed yet through a ``py::class_`` constructor or a custom type caster, +its C++ type name will be used instead to generate the signature in the docstring: + +.. code-block:: text + + | __init__(...) + | __init__(self: example.Foo, arg0: ns::Bar) -> None + ^^^^^^^ + + +This limitation can be circumvented by ensuring that C++ classes are registered with pybind11 +before they are used as a parameter or return type of a function: + +.. code-block:: cpp + + PYBIND11_MODULE(example, m) { + + auto pyFoo = py::class_(m, "Foo"); + auto pyBar = py::class_(m, "Bar"); + + pyFoo.def(py::init()); + pyBar.def(py::init()); + } diff --git a/third_party/pybind11/docs/advanced/pycpp/index.rst b/third_party/pybind11/docs/advanced/pycpp/index.rst new file mode 100644 index 0000000000..6885bdcff1 --- /dev/null +++ b/third_party/pybind11/docs/advanced/pycpp/index.rst @@ -0,0 +1,13 @@ +Python C++ interface +#################### + +pybind11 exposes Python types and functions using thin C++ wrappers, which +makes it possible to conveniently call Python code from C++ without resorting +to Python's C API. + +.. toctree:: + :maxdepth: 2 + + object + numpy + utilities diff --git a/third_party/pybind11/docs/advanced/pycpp/numpy.rst b/third_party/pybind11/docs/advanced/pycpp/numpy.rst new file mode 100644 index 0000000000..b6ef019ed0 --- /dev/null +++ b/third_party/pybind11/docs/advanced/pycpp/numpy.rst @@ -0,0 +1,455 @@ +.. _numpy: + +NumPy +##### + +Buffer protocol +=============== + +Python supports an extremely general and convenient approach for exchanging +data between plugin libraries. Types can expose a buffer view [#f2]_, which +provides fast direct access to the raw internal data representation. Suppose we +want to bind the following simplistic Matrix class: + +.. code-block:: cpp + + class Matrix { + public: + Matrix(size_t rows, size_t cols) : m_rows(rows), m_cols(cols) { + m_data = new float[rows*cols]; + } + float *data() { return m_data; } + size_t rows() const { return m_rows; } + size_t cols() const { return m_cols; } + private: + size_t m_rows, m_cols; + float *m_data; + }; + +The following binding code exposes the ``Matrix`` contents as a buffer object, +making it possible to cast Matrices into NumPy arrays. It is even possible to +completely avoid copy operations with Python expressions like +``np.array(matrix_instance, copy = False)``. + +.. code-block:: cpp + + py::class_(m, "Matrix", py::buffer_protocol()) + .def_buffer([](Matrix &m) -> py::buffer_info { + return py::buffer_info( + m.data(), /* Pointer to buffer */ + sizeof(float), /* Size of one scalar */ + py::format_descriptor::format(), /* Python struct-style format descriptor */ + 2, /* Number of dimensions */ + { m.rows(), m.cols() }, /* Buffer dimensions */ + { sizeof(float) * m.cols(), /* Strides (in bytes) for each index */ + sizeof(float) } + ); + }); + +Supporting the buffer protocol in a new type involves specifying the special +``py::buffer_protocol()`` tag in the ``py::class_`` constructor and calling the +``def_buffer()`` method with a lambda function that creates a +``py::buffer_info`` description record on demand describing a given matrix +instance. The contents of ``py::buffer_info`` mirror the Python buffer protocol +specification. + +.. code-block:: cpp + + struct buffer_info { + void *ptr; + py::ssize_t itemsize; + std::string format; + py::ssize_t ndim; + std::vector shape; + std::vector strides; + }; + +To create a C++ function that can take a Python buffer object as an argument, +simply use the type ``py::buffer`` as one of its arguments. Buffers can exist +in a great variety of configurations, hence some safety checks are usually +necessary in the function body. Below, you can see a basic example on how to +define a custom constructor for the Eigen double precision matrix +(``Eigen::MatrixXd``) type, which supports initialization from compatible +buffer objects (e.g. a NumPy matrix). + +.. code-block:: cpp + + /* Bind MatrixXd (or some other Eigen type) to Python */ + typedef Eigen::MatrixXd Matrix; + + typedef Matrix::Scalar Scalar; + constexpr bool rowMajor = Matrix::Flags & Eigen::RowMajorBit; + + py::class_(m, "Matrix", py::buffer_protocol()) + .def(py::init([](py::buffer b) { + typedef Eigen::Stride Strides; + + /* Request a buffer descriptor from Python */ + py::buffer_info info = b.request(); + + /* Some basic validation checks ... */ + if (info.format != py::format_descriptor::format()) + throw std::runtime_error("Incompatible format: expected a double array!"); + + if (info.ndim != 2) + throw std::runtime_error("Incompatible buffer dimension!"); + + auto strides = Strides( + info.strides[rowMajor ? 0 : 1] / (py::ssize_t)sizeof(Scalar), + info.strides[rowMajor ? 1 : 0] / (py::ssize_t)sizeof(Scalar)); + + auto map = Eigen::Map( + static_cast(info.ptr), info.shape[0], info.shape[1], strides); + + return Matrix(map); + })); + +For reference, the ``def_buffer()`` call for this Eigen data type should look +as follows: + +.. code-block:: cpp + + .def_buffer([](Matrix &m) -> py::buffer_info { + return py::buffer_info( + m.data(), /* Pointer to buffer */ + sizeof(Scalar), /* Size of one scalar */ + py::format_descriptor::format(), /* Python struct-style format descriptor */ + 2, /* Number of dimensions */ + { m.rows(), m.cols() }, /* Buffer dimensions */ + { sizeof(Scalar) * (rowMajor ? m.cols() : 1), + sizeof(Scalar) * (rowMajor ? 1 : m.rows()) } + /* Strides (in bytes) for each index */ + ); + }) + +For a much easier approach of binding Eigen types (although with some +limitations), refer to the section on :doc:`/advanced/cast/eigen`. + +.. seealso:: + + The file :file:`tests/test_buffers.cpp` contains a complete example + that demonstrates using the buffer protocol with pybind11 in more detail. + +.. [#f2] http://docs.python.org/3/c-api/buffer.html + +Arrays +====== + +By exchanging ``py::buffer`` with ``py::array`` in the above snippet, we can +restrict the function so that it only accepts NumPy arrays (rather than any +type of Python object satisfying the buffer protocol). + +In many situations, we want to define a function which only accepts a NumPy +array of a certain data type. This is possible via the ``py::array_t`` +template. For instance, the following function requires the argument to be a +NumPy array containing double precision values. + +.. code-block:: cpp + + void f(py::array_t array); + +When it is invoked with a different type (e.g. an integer or a list of +integers), the binding code will attempt to cast the input into a NumPy array +of the requested type. This feature requires the :file:`pybind11/numpy.h` +header to be included. Note that :file:`pybind11/numpy.h` does not depend on +the NumPy headers, and thus can be used without declaring a build-time +dependency on NumPy; NumPy>=1.7.0 is a runtime dependency. + +Data in NumPy arrays is not guaranteed to packed in a dense manner; +furthermore, entries can be separated by arbitrary column and row strides. +Sometimes, it can be useful to require a function to only accept dense arrays +using either the C (row-major) or Fortran (column-major) ordering. This can be +accomplished via a second template argument with values ``py::array::c_style`` +or ``py::array::f_style``. + +.. code-block:: cpp + + void f(py::array_t array); + +The ``py::array::forcecast`` argument is the default value of the second +template parameter, and it ensures that non-conforming arguments are converted +into an array satisfying the specified requirements instead of trying the next +function overload. + +There are several methods on arrays; the methods listed below under references +work, as well as the following functions based on the NumPy API: + +- ``.dtype()`` returns the type of the contained values. + +- ``.strides()`` returns a pointer to the strides of the array (optionally pass + an integer axis to get a number). + +- ``.flags()`` returns the flag settings. ``.writable()`` and ``.owndata()`` + are directly available. + +- ``.offset_at()`` returns the offset (optionally pass indices). + +- ``.squeeze()`` returns a view with length-1 axes removed. + +- ``.view(dtype)`` returns a view of the array with a different dtype. + +- ``.reshape({i, j, ...})`` returns a view of the array with a different shape. + ``.resize({...})`` is also available. + +- ``.index_at(i, j, ...)`` gets the count from the beginning to a given index. + + +There are also several methods for getting references (described below). + +Structured types +================ + +In order for ``py::array_t`` to work with structured (record) types, we first +need to register the memory layout of the type. This can be done via +``PYBIND11_NUMPY_DTYPE`` macro, called in the plugin definition code, which +expects the type followed by field names: + +.. code-block:: cpp + + struct A { + int x; + double y; + }; + + struct B { + int z; + A a; + }; + + // ... + PYBIND11_MODULE(test, m) { + // ... + + PYBIND11_NUMPY_DTYPE(A, x, y); + PYBIND11_NUMPY_DTYPE(B, z, a); + /* now both A and B can be used as template arguments to py::array_t */ + } + +The structure should consist of fundamental arithmetic types, ``std::complex``, +previously registered substructures, and arrays of any of the above. Both C++ +arrays and ``std::array`` are supported. While there is a static assertion to +prevent many types of unsupported structures, it is still the user's +responsibility to use only "plain" structures that can be safely manipulated as +raw memory without violating invariants. + +Vectorizing functions +===================== + +Suppose we want to bind a function with the following signature to Python so +that it can process arbitrary NumPy array arguments (vectors, matrices, general +N-D arrays) in addition to its normal arguments: + +.. code-block:: cpp + + double my_func(int x, float y, double z); + +After including the ``pybind11/numpy.h`` header, this is extremely simple: + +.. code-block:: cpp + + m.def("vectorized_func", py::vectorize(my_func)); + +Invoking the function like below causes 4 calls to be made to ``my_func`` with +each of the array elements. The significant advantage of this compared to +solutions like ``numpy.vectorize()`` is that the loop over the elements runs +entirely on the C++ side and can be crunched down into a tight, optimized loop +by the compiler. The result is returned as a NumPy array of type +``numpy.dtype.float64``. + +.. code-block:: pycon + + >>> x = np.array([[1, 3], [5, 7]]) + >>> y = np.array([[2, 4], [6, 8]]) + >>> z = 3 + >>> result = vectorized_func(x, y, z) + +The scalar argument ``z`` is transparently replicated 4 times. The input +arrays ``x`` and ``y`` are automatically converted into the right types (they +are of type ``numpy.dtype.int64`` but need to be ``numpy.dtype.int32`` and +``numpy.dtype.float32``, respectively). + +.. note:: + + Only arithmetic, complex, and POD types passed by value or by ``const &`` + reference are vectorized; all other arguments are passed through as-is. + Functions taking rvalue reference arguments cannot be vectorized. + +In cases where the computation is too complicated to be reduced to +``vectorize``, it will be necessary to create and access the buffer contents +manually. The following snippet contains a complete example that shows how this +works (the code is somewhat contrived, since it could have been done more +simply using ``vectorize``). + +.. code-block:: cpp + + #include + #include + + namespace py = pybind11; + + py::array_t add_arrays(py::array_t input1, py::array_t input2) { + py::buffer_info buf1 = input1.request(), buf2 = input2.request(); + + if (buf1.ndim != 1 || buf2.ndim != 1) + throw std::runtime_error("Number of dimensions must be one"); + + if (buf1.size != buf2.size) + throw std::runtime_error("Input shapes must match"); + + /* No pointer is passed, so NumPy will allocate the buffer */ + auto result = py::array_t(buf1.size); + + py::buffer_info buf3 = result.request(); + + double *ptr1 = static_cast(buf1.ptr); + double *ptr2 = static_cast(buf2.ptr); + double *ptr3 = static_cast(buf3.ptr); + + for (size_t idx = 0; idx < buf1.shape[0]; idx++) + ptr3[idx] = ptr1[idx] + ptr2[idx]; + + return result; + } + + PYBIND11_MODULE(test, m) { + m.def("add_arrays", &add_arrays, "Add two NumPy arrays"); + } + +.. seealso:: + + The file :file:`tests/test_numpy_vectorize.cpp` contains a complete + example that demonstrates using :func:`vectorize` in more detail. + +Direct access +============= + +For performance reasons, particularly when dealing with very large arrays, it +is often desirable to directly access array elements without internal checking +of dimensions and bounds on every access when indices are known to be already +valid. To avoid such checks, the ``array`` class and ``array_t`` template +class offer an unchecked proxy object that can be used for this unchecked +access through the ``unchecked`` and ``mutable_unchecked`` methods, +where ``N`` gives the required dimensionality of the array: + +.. code-block:: cpp + + m.def("sum_3d", [](py::array_t x) { + auto r = x.unchecked<3>(); // x must have ndim = 3; can be non-writeable + double sum = 0; + for (py::ssize_t i = 0; i < r.shape(0); i++) + for (py::ssize_t j = 0; j < r.shape(1); j++) + for (py::ssize_t k = 0; k < r.shape(2); k++) + sum += r(i, j, k); + return sum; + }); + m.def("increment_3d", [](py::array_t x) { + auto r = x.mutable_unchecked<3>(); // Will throw if ndim != 3 or flags.writeable is false + for (py::ssize_t i = 0; i < r.shape(0); i++) + for (py::ssize_t j = 0; j < r.shape(1); j++) + for (py::ssize_t k = 0; k < r.shape(2); k++) + r(i, j, k) += 1.0; + }, py::arg().noconvert()); + +To obtain the proxy from an ``array`` object, you must specify both the data +type and number of dimensions as template arguments, such as ``auto r = +myarray.mutable_unchecked()``. + +If the number of dimensions is not known at compile time, you can omit the +dimensions template parameter (i.e. calling ``arr_t.unchecked()`` or +``arr.unchecked()``. This will give you a proxy object that works in the +same way, but results in less optimizable code and thus a small efficiency +loss in tight loops. + +Note that the returned proxy object directly references the array's data, and +only reads its shape, strides, and writeable flag when constructed. You must +take care to ensure that the referenced array is not destroyed or reshaped for +the duration of the returned object, typically by limiting the scope of the +returned instance. + +The returned proxy object supports some of the same methods as ``py::array`` so +that it can be used as a drop-in replacement for some existing, index-checked +uses of ``py::array``: + +- ``.ndim()`` returns the number of dimensions + +- ``.data(1, 2, ...)`` and ``r.mutable_data(1, 2, ...)``` returns a pointer to + the ``const T`` or ``T`` data, respectively, at the given indices. The + latter is only available to proxies obtained via ``a.mutable_unchecked()``. + +- ``.itemsize()`` returns the size of an item in bytes, i.e. ``sizeof(T)``. + +- ``.ndim()`` returns the number of dimensions. + +- ``.shape(n)`` returns the size of dimension ``n`` + +- ``.size()`` returns the total number of elements (i.e. the product of the shapes). + +- ``.nbytes()`` returns the number of bytes used by the referenced elements + (i.e. ``itemsize()`` times ``size()``). + +.. seealso:: + + The file :file:`tests/test_numpy_array.cpp` contains additional examples + demonstrating the use of this feature. + +Ellipsis +======== + +Python provides a convenient ``...`` ellipsis notation that is often used to +slice multidimensional arrays. For instance, the following snippet extracts the +middle dimensions of a tensor with the first and last index set to zero. + +.. code-block:: python + + a = ... # a NumPy array + b = a[0, ..., 0] + +The function ``py::ellipsis()`` function can be used to perform the same +operation on the C++ side: + +.. code-block:: cpp + + py::array a = /* A NumPy array */; + py::array b = a[py::make_tuple(0, py::ellipsis(), 0)]; + + +Memory view +=========== + +For a case when we simply want to provide a direct accessor to C/C++ buffer +without a concrete class object, we can return a ``memoryview`` object. Suppose +we wish to expose a ``memoryview`` for 2x4 uint8_t array, we can do the +following: + +.. code-block:: cpp + + const uint8_t buffer[] = { + 0, 1, 2, 3, + 4, 5, 6, 7 + }; + m.def("get_memoryview2d", []() { + return py::memoryview::from_buffer( + buffer, // buffer pointer + { 2, 4 }, // shape (rows, cols) + { sizeof(uint8_t) * 4, sizeof(uint8_t) } // strides in bytes + ); + }) + +This approach is meant for providing a ``memoryview`` for a C/C++ buffer not +managed by Python. The user is responsible for managing the lifetime of the +buffer. Using a ``memoryview`` created in this way after deleting the buffer in +C++ side results in undefined behavior. + +We can also use ``memoryview::from_memory`` for a simple 1D contiguous buffer: + +.. code-block:: cpp + + m.def("get_memoryview1d", []() { + return py::memoryview::from_memory( + buffer, // buffer pointer + sizeof(uint8_t) * 8 // buffer size + ); + }) + +.. versionchanged:: 2.6 + ``memoryview::from_memory`` added. diff --git a/third_party/pybind11/docs/advanced/pycpp/object.rst b/third_party/pybind11/docs/advanced/pycpp/object.rst new file mode 100644 index 0000000000..93e1a94d8f --- /dev/null +++ b/third_party/pybind11/docs/advanced/pycpp/object.rst @@ -0,0 +1,286 @@ +Python types +############ + +.. _wrappers: + +Available wrappers +================== + +All major Python types are available as thin C++ wrapper classes. These +can also be used as function parameters -- see :ref:`python_objects_as_args`. + +Available types include :class:`handle`, :class:`object`, :class:`bool_`, +:class:`int_`, :class:`float_`, :class:`str`, :class:`bytes`, :class:`tuple`, +:class:`list`, :class:`dict`, :class:`slice`, :class:`none`, :class:`capsule`, +:class:`iterable`, :class:`iterator`, :class:`function`, :class:`buffer`, +:class:`array`, and :class:`array_t`. + +.. warning:: + + Be sure to review the :ref:`pytypes_gotchas` before using this heavily in + your C++ API. + +.. _instantiating_compound_types: + +Instantiating compound Python types from C++ +============================================ + +Dictionaries can be initialized in the :class:`dict` constructor: + +.. code-block:: cpp + + using namespace pybind11::literals; // to bring in the `_a` literal + py::dict d("spam"_a=py::none(), "eggs"_a=42); + +A tuple of python objects can be instantiated using :func:`py::make_tuple`: + +.. code-block:: cpp + + py::tuple tup = py::make_tuple(42, py::none(), "spam"); + +Each element is converted to a supported Python type. + +A `simple namespace`_ can be instantiated using + +.. code-block:: cpp + + using namespace pybind11::literals; // to bring in the `_a` literal + py::object SimpleNamespace = py::module_::import("types").attr("SimpleNamespace"); + py::object ns = SimpleNamespace("spam"_a=py::none(), "eggs"_a=42); + +Attributes on a namespace can be modified with the :func:`py::delattr`, +:func:`py::getattr`, and :func:`py::setattr` functions. Simple namespaces can +be useful as lightweight stand-ins for class instances. + +.. _simple namespace: https://docs.python.org/3/library/types.html#types.SimpleNamespace + +.. _casting_back_and_forth: + +Casting back and forth +====================== + +In this kind of mixed code, it is often necessary to convert arbitrary C++ +types to Python, which can be done using :func:`py::cast`: + +.. code-block:: cpp + + MyClass *cls = ...; + py::object obj = py::cast(cls); + +The reverse direction uses the following syntax: + +.. code-block:: cpp + + py::object obj = ...; + MyClass *cls = obj.cast(); + +When conversion fails, both directions throw the exception :class:`cast_error`. + +.. _python_libs: + +Accessing Python libraries from C++ +=================================== + +It is also possible to import objects defined in the Python standard +library or available in the current Python environment (``sys.path``) and work +with these in C++. + +This example obtains a reference to the Python ``Decimal`` class. + +.. code-block:: cpp + + // Equivalent to "from decimal import Decimal" + py::object Decimal = py::module_::import("decimal").attr("Decimal"); + +.. code-block:: cpp + + // Try to import scipy + py::object scipy = py::module_::import("scipy"); + return scipy.attr("__version__"); + + +.. _calling_python_functions: + +Calling Python functions +======================== + +It is also possible to call Python classes, functions and methods +via ``operator()``. + +.. code-block:: cpp + + // Construct a Python object of class Decimal + py::object pi = Decimal("3.14159"); + +.. code-block:: cpp + + // Use Python to make our directories + py::object os = py::module_::import("os"); + py::object makedirs = os.attr("makedirs"); + makedirs("/tmp/path/to/somewhere"); + +One can convert the result obtained from Python to a pure C++ version +if a ``py::class_`` or type conversion is defined. + +.. code-block:: cpp + + py::function f = <...>; + py::object result_py = f(1234, "hello", some_instance); + MyClass &result = result_py.cast(); + +.. _calling_python_methods: + +Calling Python methods +======================== + +To call an object's method, one can again use ``.attr`` to obtain access to the +Python method. + +.. code-block:: cpp + + // Calculate e^π in decimal + py::object exp_pi = pi.attr("exp")(); + py::print(py::str(exp_pi)); + +In the example above ``pi.attr("exp")`` is a *bound method*: it will always call +the method for that same instance of the class. Alternately one can create an +*unbound method* via the Python class (instead of instance) and pass the ``self`` +object explicitly, followed by other arguments. + +.. code-block:: cpp + + py::object decimal_exp = Decimal.attr("exp"); + + // Compute the e^n for n=0..4 + for (int n = 0; n < 5; n++) { + py::print(decimal_exp(Decimal(n)); + } + +Keyword arguments +================= + +Keyword arguments are also supported. In Python, there is the usual call syntax: + +.. code-block:: python + + def f(number, say, to): + ... # function code + + + f(1234, say="hello", to=some_instance) # keyword call in Python + +In C++, the same call can be made using: + +.. code-block:: cpp + + using namespace pybind11::literals; // to bring in the `_a` literal + f(1234, "say"_a="hello", "to"_a=some_instance); // keyword call in C++ + +Unpacking arguments +=================== + +Unpacking of ``*args`` and ``**kwargs`` is also possible and can be mixed with +other arguments: + +.. code-block:: cpp + + // * unpacking + py::tuple args = py::make_tuple(1234, "hello", some_instance); + f(*args); + + // ** unpacking + py::dict kwargs = py::dict("number"_a=1234, "say"_a="hello", "to"_a=some_instance); + f(**kwargs); + + // mixed keywords, * and ** unpacking + py::tuple args = py::make_tuple(1234); + py::dict kwargs = py::dict("to"_a=some_instance); + f(*args, "say"_a="hello", **kwargs); + +Generalized unpacking according to PEP448_ is also supported: + +.. code-block:: cpp + + py::dict kwargs1 = py::dict("number"_a=1234); + py::dict kwargs2 = py::dict("to"_a=some_instance); + f(**kwargs1, "say"_a="hello", **kwargs2); + +.. seealso:: + + The file :file:`tests/test_pytypes.cpp` contains a complete + example that demonstrates passing native Python types in more detail. The + file :file:`tests/test_callbacks.cpp` presents a few examples of calling + Python functions from C++, including keywords arguments and unpacking. + +.. _PEP448: https://www.python.org/dev/peps/pep-0448/ + +.. _implicit_casting: + +Implicit casting +================ + +When using the C++ interface for Python types, or calling Python functions, +objects of type :class:`object` are returned. It is possible to invoke implicit +conversions to subclasses like :class:`dict`. The same holds for the proxy objects +returned by ``operator[]`` or ``obj.attr()``. +Casting to subtypes improves code readability and allows values to be passed to +C++ functions that require a specific subtype rather than a generic :class:`object`. + +.. code-block:: cpp + + #include + using namespace pybind11::literals; + + py::module_ os = py::module_::import("os"); + py::module_ path = py::module_::import("os.path"); // like 'import os.path as path' + py::module_ np = py::module_::import("numpy"); // like 'import numpy as np' + + py::str curdir_abs = path.attr("abspath")(path.attr("curdir")); + py::print(py::str("Current directory: ") + curdir_abs); + py::dict environ = os.attr("environ"); + py::print(environ["HOME"]); + py::array_t arr = np.attr("ones")(3, "dtype"_a="float32"); + py::print(py::repr(arr + py::int_(1))); + +These implicit conversions are available for subclasses of :class:`object`; there +is no need to call ``obj.cast()`` explicitly as for custom classes, see +:ref:`casting_back_and_forth`. + +.. note:: + If a trivial conversion via move constructor is not possible, both implicit and + explicit casting (calling ``obj.cast()``) will attempt a "rich" conversion. + For instance, ``py::list env = os.attr("environ");`` will succeed and is + equivalent to the Python code ``env = list(os.environ)`` that produces a + list of the dict keys. + +.. TODO: Adapt text once PR #2349 has landed + +Handling exceptions +=================== + +Python exceptions from wrapper classes will be thrown as a ``py::error_already_set``. +See :ref:`Handling exceptions from Python in C++ +` for more information on handling exceptions +raised when calling C++ wrapper classes. + +.. _pytypes_gotchas: + +Gotchas +======= + +Default-Constructed Wrappers +---------------------------- + +When a wrapper type is default-constructed, it is **not** a valid Python object (i.e. it is not ``py::none()``). It is simply the same as +``PyObject*`` null pointer. To check for this, use +``static_cast(my_wrapper)``. + +Assigning py::none() to wrappers +-------------------------------- + +You may be tempted to use types like ``py::str`` and ``py::dict`` in C++ +signatures (either pure C++, or in bound signatures), and assign them default +values of ``py::none()``. However, in a best case scenario, it will fail fast +because ``None`` is not convertible to that type (e.g. ``py::dict``), or in a +worse case scenario, it will silently work but corrupt the types you want to +work with (e.g. ``py::str(py::none())`` will yield ``"None"`` in Python). diff --git a/third_party/pybind11/docs/advanced/pycpp/utilities.rst b/third_party/pybind11/docs/advanced/pycpp/utilities.rst new file mode 100644 index 0000000000..af0f9cb2b0 --- /dev/null +++ b/third_party/pybind11/docs/advanced/pycpp/utilities.rst @@ -0,0 +1,155 @@ +Utilities +######### + +Using Python's print function in C++ +==================================== + +The usual way to write output in C++ is using ``std::cout`` while in Python one +would use ``print``. Since these methods use different buffers, mixing them can +lead to output order issues. To resolve this, pybind11 modules can use the +:func:`py::print` function which writes to Python's ``sys.stdout`` for consistency. + +Python's ``print`` function is replicated in the C++ API including optional +keyword arguments ``sep``, ``end``, ``file``, ``flush``. Everything works as +expected in Python: + +.. code-block:: cpp + + py::print(1, 2.0, "three"); // 1 2.0 three + py::print(1, 2.0, "three", "sep"_a="-"); // 1-2.0-three + + auto args = py::make_tuple("unpacked", true); + py::print("->", *args, "end"_a="<-"); // -> unpacked True <- + +.. _ostream_redirect: + +Capturing standard output from ostream +====================================== + +Often, a library will use the streams ``std::cout`` and ``std::cerr`` to print, +but this does not play well with Python's standard ``sys.stdout`` and ``sys.stderr`` +redirection. Replacing a library's printing with ``py::print `` may not +be feasible. This can be fixed using a guard around the library function that +redirects output to the corresponding Python streams: + +.. code-block:: cpp + + #include + + ... + + // Add a scoped redirect for your noisy code + m.def("noisy_func", []() { + py::scoped_ostream_redirect stream( + std::cout, // std::ostream& + py::module_::import("sys").attr("stdout") // Python output + ); + call_noisy_func(); + }); + +.. warning:: + + The implementation in ``pybind11/iostream.h`` is NOT thread safe. Multiple + threads writing to a redirected ostream concurrently cause data races + and potentially buffer overflows. Therefore it is currently a requirement + that all (possibly) concurrent redirected ostream writes are protected by + a mutex. #HelpAppreciated: Work on iostream.h thread safety. For more + background see the discussions under + `PR #2982 `_ and + `PR #2995 `_. + +This method respects flushes on the output streams and will flush if needed +when the scoped guard is destroyed. This allows the output to be redirected in +real time, such as to a Jupyter notebook. The two arguments, the C++ stream and +the Python output, are optional, and default to standard output if not given. An +extra type, ``py::scoped_estream_redirect ``, is identical +except for defaulting to ``std::cerr`` and ``sys.stderr``; this can be useful with +``py::call_guard``, which allows multiple items, but uses the default constructor: + +.. code-block:: cpp + + // Alternative: Call single function using call guard + m.def("noisy_func", &call_noisy_function, + py::call_guard()); + +The redirection can also be done in Python with the addition of a context +manager, using the ``py::add_ostream_redirect() `` function: + +.. code-block:: cpp + + py::add_ostream_redirect(m, "ostream_redirect"); + +The name in Python defaults to ``ostream_redirect`` if no name is passed. This +creates the following context manager in Python: + +.. code-block:: python + + with ostream_redirect(stdout=True, stderr=True): + noisy_function() + +It defaults to redirecting both streams, though you can use the keyword +arguments to disable one of the streams if needed. + +.. note:: + + The above methods will not redirect C-level output to file descriptors, such + as ``fprintf``. For those cases, you'll need to redirect the file + descriptors either directly in C or with Python's ``os.dup2`` function + in an operating-system dependent way. + +.. _eval: + +Evaluating Python expressions from strings and files +==================================================== + +pybind11 provides the ``eval``, ``exec`` and ``eval_file`` functions to evaluate +Python expressions and statements. The following example illustrates how they +can be used. + +.. code-block:: cpp + + // At beginning of file + #include + + ... + + // Evaluate in scope of main module + py::object scope = py::module_::import("__main__").attr("__dict__"); + + // Evaluate an isolated expression + int result = py::eval("my_variable + 10", scope).cast(); + + // Evaluate a sequence of statements + py::exec( + "print('Hello')\n" + "print('world!');", + scope); + + // Evaluate the statements in an separate Python file on disk + py::eval_file("script.py", scope); + +C++11 raw string literals are also supported and quite handy for this purpose. +The only requirement is that the first statement must be on a new line following +the raw string delimiter ``R"(``, ensuring all lines have common leading indent: + +.. code-block:: cpp + + py::exec(R"( + x = get_answer() + if x == 42: + print('Hello World!') + else: + print('Bye!') + )", scope + ); + +.. note:: + + `eval` and `eval_file` accept a template parameter that describes how the + string/file should be interpreted. Possible choices include ``eval_expr`` + (isolated expression), ``eval_single_statement`` (a single statement, return + value is always ``none``), and ``eval_statements`` (sequence of statements, + return value is always ``none``). `eval` defaults to ``eval_expr``, + `eval_file` defaults to ``eval_statements`` and `exec` is just a shortcut + for ``eval``. diff --git a/third_party/pybind11/docs/advanced/smart_ptrs.rst b/third_party/pybind11/docs/advanced/smart_ptrs.rst new file mode 100644 index 0000000000..5a22201095 --- /dev/null +++ b/third_party/pybind11/docs/advanced/smart_ptrs.rst @@ -0,0 +1,174 @@ +Smart pointers +############## + +std::unique_ptr +=============== + +Given a class ``Example`` with Python bindings, it's possible to return +instances wrapped in C++11 unique pointers, like so + +.. code-block:: cpp + + std::unique_ptr create_example() { return std::unique_ptr(new Example()); } + +.. code-block:: cpp + + m.def("create_example", &create_example); + +In other words, there is nothing special that needs to be done. While returning +unique pointers in this way is allowed, it is *illegal* to use them as function +arguments. For instance, the following function signature cannot be processed +by pybind11. + +.. code-block:: cpp + + void do_something_with_example(std::unique_ptr ex) { ... } + +The above signature would imply that Python needs to give up ownership of an +object that is passed to this function, which is generally not possible (for +instance, the object might be referenced elsewhere). + +std::shared_ptr +=============== + +The binding generator for classes, :class:`class_`, can be passed a template +type that denotes a special *holder* type that is used to manage references to +the object. If no such holder type template argument is given, the default for +a type named ``Type`` is ``std::unique_ptr``, which means that the object +is deallocated when Python's reference count goes to zero. + +It is possible to switch to other types of reference counting wrappers or smart +pointers, which is useful in codebases that rely on them. For instance, the +following snippet causes ``std::shared_ptr`` to be used instead. + +.. code-block:: cpp + + py::class_ /* <- holder type */> obj(m, "Example"); + +Note that any particular class can only be associated with a single holder type. + +One potential stumbling block when using holder types is that they need to be +applied consistently. Can you guess what's broken about the following binding +code? + +.. code-block:: cpp + + class Child { }; + + class Parent { + public: + Parent() : child(std::make_shared()) { } + Child *get_child() { return child.get(); } /* Hint: ** DON'T DO THIS ** */ + private: + std::shared_ptr child; + }; + + PYBIND11_MODULE(example, m) { + py::class_>(m, "Child"); + + py::class_>(m, "Parent") + .def(py::init<>()) + .def("get_child", &Parent::get_child); + } + +The following Python code will cause undefined behavior (and likely a +segmentation fault). + +.. code-block:: python + + from example import Parent + + print(Parent().get_child()) + +The problem is that ``Parent::get_child()`` returns a pointer to an instance of +``Child``, but the fact that this instance is already managed by +``std::shared_ptr<...>`` is lost when passing raw pointers. In this case, +pybind11 will create a second independent ``std::shared_ptr<...>`` that also +claims ownership of the pointer. In the end, the object will be freed **twice** +since these shared pointers have no way of knowing about each other. + +There are two ways to resolve this issue: + +1. For types that are managed by a smart pointer class, never use raw pointers + in function arguments or return values. In other words: always consistently + wrap pointers into their designated holder types (such as + ``std::shared_ptr<...>``). In this case, the signature of ``get_child()`` + should be modified as follows: + +.. code-block:: cpp + + std::shared_ptr get_child() { return child; } + +2. Adjust the definition of ``Child`` by specifying + ``std::enable_shared_from_this`` (see cppreference_ for details) as a + base class. This adds a small bit of information to ``Child`` that allows + pybind11 to realize that there is already an existing + ``std::shared_ptr<...>`` and communicate with it. In this case, the + declaration of ``Child`` should look as follows: + +.. _cppreference: http://en.cppreference.com/w/cpp/memory/enable_shared_from_this + +.. code-block:: cpp + + class Child : public std::enable_shared_from_this { }; + +.. _smart_pointers: + +Custom smart pointers +===================== + +pybind11 supports ``std::unique_ptr`` and ``std::shared_ptr`` right out of the +box. For any other custom smart pointer, transparent conversions can be enabled +using a macro invocation similar to the following. It must be declared at the +top namespace level before any binding code: + +.. code-block:: cpp + + PYBIND11_DECLARE_HOLDER_TYPE(T, SmartPtr); + +The first argument of :func:`PYBIND11_DECLARE_HOLDER_TYPE` should be a +placeholder name that is used as a template parameter of the second argument. +Thus, feel free to use any identifier, but use it consistently on both sides; +also, don't use the name of a type that already exists in your codebase. + +The macro also accepts a third optional boolean parameter that is set to false +by default. Specify + +.. code-block:: cpp + + PYBIND11_DECLARE_HOLDER_TYPE(T, SmartPtr, true); + +if ``SmartPtr`` can always be initialized from a ``T*`` pointer without the +risk of inconsistencies (such as multiple independent ``SmartPtr`` instances +believing that they are the sole owner of the ``T*`` pointer). A common +situation where ``true`` should be passed is when the ``T`` instances use +*intrusive* reference counting. + +Please take a look at the :ref:`macro_notes` before using this feature. + +By default, pybind11 assumes that your custom smart pointer has a standard +interface, i.e. provides a ``.get()`` member function to access the underlying +raw pointer. If this is not the case, pybind11's ``holder_helper`` must be +specialized: + +.. code-block:: cpp + + // Always needed for custom holder types + PYBIND11_DECLARE_HOLDER_TYPE(T, SmartPtr); + + // Only needed if the type's `.get()` goes by another name + namespace pybind11 { namespace detail { + template + struct holder_helper> { // <-- specialization + static const T *get(const SmartPtr &p) { return p.getPointer(); } + }; + }} + +The above specialization informs pybind11 that the custom ``SmartPtr`` class +provides ``.get()`` functionality via ``.getPointer()``. + +.. seealso:: + + The file :file:`tests/test_smart_ptr.cpp` contains a complete example + that demonstrates how to work with custom reference-counting holder types + in more detail. diff --git a/third_party/pybind11/docs/basics.rst b/third_party/pybind11/docs/basics.rst new file mode 100644 index 0000000000..e9b24c7fa7 --- /dev/null +++ b/third_party/pybind11/docs/basics.rst @@ -0,0 +1,307 @@ +.. _basics: + +First steps +########### + +This sections demonstrates the basic features of pybind11. Before getting +started, make sure that development environment is set up to compile the +included set of test cases. + + +Compiling the test cases +======================== + +Linux/macOS +----------- + +On Linux you'll need to install the **python-dev** or **python3-dev** packages as +well as **cmake**. On macOS, the included python version works out of the box, +but **cmake** must still be installed. + +After installing the prerequisites, run + +.. code-block:: bash + + mkdir build + cd build + cmake .. + make check -j 4 + +The last line will both compile and run the tests. + +Windows +------- + +On Windows, only **Visual Studio 2017** and newer are supported. + +.. Note:: + + To use the C++17 in Visual Studio 2017 (MSVC 14.1), pybind11 requires the flag + ``/permissive-`` to be passed to the compiler `to enforce standard conformance`_. When + building with Visual Studio 2019, this is not strictly necessary, but still advised. + +.. _`to enforce standard conformance`: https://docs.microsoft.com/en-us/cpp/build/reference/permissive-standards-conformance?view=vs-2017 + +To compile and run the tests: + +.. code-block:: batch + + mkdir build + cd build + cmake .. + cmake --build . --config Release --target check + +This will create a Visual Studio project, compile and run the target, all from the +command line. + +.. Note:: + + If all tests fail, make sure that the Python binary and the testcases are compiled + for the same processor type and bitness (i.e. either **i386** or **x86_64**). You + can specify **x86_64** as the target architecture for the generated Visual Studio + project using ``cmake -A x64 ..``. + +.. seealso:: + + Advanced users who are already familiar with Boost.Python may want to skip + the tutorial and look at the test cases in the :file:`tests` directory, + which exercise all features of pybind11. + +Header and namespace conventions +================================ + +For brevity, all code examples assume that the following two lines are present: + +.. code-block:: cpp + + #include + + namespace py = pybind11; + +Some features may require additional headers, but those will be specified as needed. + +.. _simple_example: + +Creating bindings for a simple function +======================================= + +Let's start by creating Python bindings for an extremely simple function, which +adds two numbers and returns their result: + +.. code-block:: cpp + + int add(int i, int j) { + return i + j; + } + +For simplicity [#f1]_, we'll put both this function and the binding code into +a file named :file:`example.cpp` with the following contents: + +.. code-block:: cpp + + #include + + int add(int i, int j) { + return i + j; + } + + PYBIND11_MODULE(example, m) { + m.doc() = "pybind11 example plugin"; // optional module docstring + + m.def("add", &add, "A function that adds two numbers"); + } + +.. [#f1] In practice, implementation and binding code will generally be located + in separate files. + +The :func:`PYBIND11_MODULE` macro creates a function that will be called when an +``import`` statement is issued from within Python. The module name (``example``) +is given as the first macro argument (it should not be in quotes). The second +argument (``m``) defines a variable of type :class:`py::module_ ` which +is the main interface for creating bindings. The method :func:`module_::def` +generates binding code that exposes the ``add()`` function to Python. + +.. note:: + + Notice how little code was needed to expose our function to Python: all + details regarding the function's parameters and return value were + automatically inferred using template metaprogramming. This overall + approach and the used syntax are borrowed from Boost.Python, though the + underlying implementation is very different. + +pybind11 is a header-only library, hence it is not necessary to link against +any special libraries and there are no intermediate (magic) translation steps. +On Linux, the above example can be compiled using the following command: + +.. code-block:: bash + + $ c++ -O3 -Wall -shared -std=c++11 -fPIC $(python3 -m pybind11 --includes) example.cpp -o example$(python3-config --extension-suffix) + +.. note:: + + If you used :ref:`include_as_a_submodule` to get the pybind11 source, then + use ``$(python3-config --includes) -Iextern/pybind11/include`` instead of + ``$(python3 -m pybind11 --includes)`` in the above compilation, as + explained in :ref:`building_manually`. + +For more details on the required compiler flags on Linux and macOS, see +:ref:`building_manually`. For complete cross-platform compilation instructions, +refer to the :ref:`compiling` page. + +The `python_example`_ and `cmake_example`_ repositories are also a good place +to start. They are both complete project examples with cross-platform build +systems. The only difference between the two is that `python_example`_ uses +Python's ``setuptools`` to build the module, while `cmake_example`_ uses CMake +(which may be preferable for existing C++ projects). + +.. _python_example: https://github.com/pybind/python_example +.. _cmake_example: https://github.com/pybind/cmake_example + +Building the above C++ code will produce a binary module file that can be +imported to Python. Assuming that the compiled module is located in the +current directory, the following interactive Python session shows how to +load and execute the example: + +.. code-block:: pycon + + $ python + Python 3.9.10 (main, Jan 15 2022, 11:48:04) + [Clang 13.0.0 (clang-1300.0.29.3)] on darwin + Type "help", "copyright", "credits" or "license" for more information. + >>> import example + >>> example.add(1, 2) + 3 + >>> + +.. _keyword_args: + +Keyword arguments +================= + +With a simple code modification, it is possible to inform Python about the +names of the arguments ("i" and "j" in this case). + +.. code-block:: cpp + + m.def("add", &add, "A function which adds two numbers", + py::arg("i"), py::arg("j")); + +:class:`arg` is one of several special tag classes which can be used to pass +metadata into :func:`module_::def`. With this modified binding code, we can now +call the function using keyword arguments, which is a more readable alternative +particularly for functions taking many parameters: + +.. code-block:: pycon + + >>> import example + >>> example.add(i=1, j=2) + 3L + +The keyword names also appear in the function signatures within the documentation. + +.. code-block:: pycon + + >>> help(example) + + .... + + FUNCTIONS + add(...) + Signature : (i: int, j: int) -> int + + A function which adds two numbers + +A shorter notation for named arguments is also available: + +.. code-block:: cpp + + // regular notation + m.def("add1", &add, py::arg("i"), py::arg("j")); + // shorthand + using namespace pybind11::literals; + m.def("add2", &add, "i"_a, "j"_a); + +The :var:`_a` suffix forms a C++11 literal which is equivalent to :class:`arg`. +Note that the literal operator must first be made visible with the directive +``using namespace pybind11::literals``. This does not bring in anything else +from the ``pybind11`` namespace except for literals. + +.. _default_args: + +Default arguments +================= + +Suppose now that the function to be bound has default arguments, e.g.: + +.. code-block:: cpp + + int add(int i = 1, int j = 2) { + return i + j; + } + +Unfortunately, pybind11 cannot automatically extract these parameters, since they +are not part of the function's type information. However, they are simple to specify +using an extension of :class:`arg`: + +.. code-block:: cpp + + m.def("add", &add, "A function which adds two numbers", + py::arg("i") = 1, py::arg("j") = 2); + +The default values also appear within the documentation. + +.. code-block:: pycon + + >>> help(example) + + .... + + FUNCTIONS + add(...) + Signature : (i: int = 1, j: int = 2) -> int + + A function which adds two numbers + +The shorthand notation is also available for default arguments: + +.. code-block:: cpp + + // regular notation + m.def("add1", &add, py::arg("i") = 1, py::arg("j") = 2); + // shorthand + m.def("add2", &add, "i"_a=1, "j"_a=2); + +Exporting variables +=================== + +To expose a value from C++, use the ``attr`` function to register it in a +module as shown below. Built-in types and general objects (more on that later) +are automatically converted when assigned as attributes, and can be explicitly +converted using the function ``py::cast``. + +.. code-block:: cpp + + PYBIND11_MODULE(example, m) { + m.attr("the_answer") = 42; + py::object world = py::cast("World"); + m.attr("what") = world; + } + +These are then accessible from Python: + +.. code-block:: pycon + + >>> import example + >>> example.the_answer + 42 + >>> example.what + 'World' + +.. _supported_types: + +Supported data types +==================== + +A large number of data types are supported out of the box and can be used +seamlessly as functions arguments, return values or with ``py::cast`` in general. +For a full overview, see the :doc:`advanced/cast/index` section. diff --git a/third_party/pybind11/docs/benchmark.py b/third_party/pybind11/docs/benchmark.py new file mode 100644 index 0000000000..711343075b --- /dev/null +++ b/third_party/pybind11/docs/benchmark.py @@ -0,0 +1,86 @@ +import datetime as dt +import os +import random + +nfns = 4 # Functions per class +nargs = 4 # Arguments per function + + +def generate_dummy_code_pybind11(nclasses=10): + decl = "" + bindings = "" + + for cl in range(nclasses): + decl += f"class cl{cl:03};\n" + decl += "\n" + + for cl in range(nclasses): + decl += f"class {cl:03} {{\n" + decl += "public:\n" + bindings += f' py::class_(m, "cl{cl:03}")\n' + for fn in range(nfns): + ret = random.randint(0, nclasses - 1) + params = [random.randint(0, nclasses - 1) for i in range(nargs)] + decl += f" cl{ret:03} *fn_{fn:03}(" + decl += ", ".join(f"cl{p:03} *" for p in params) + decl += ");\n" + bindings += f' .def("fn_{fn:03}", &cl{cl:03}::fn_{fn:03})\n' + decl += "};\n\n" + bindings += " ;\n" + + result = "#include \n\n" + result += "namespace py = pybind11;\n\n" + result += decl + "\n" + result += "PYBIND11_MODULE(example, m) {\n" + result += bindings + result += "}" + return result + + +def generate_dummy_code_boost(nclasses=10): + decl = "" + bindings = "" + + for cl in range(nclasses): + decl += f"class cl{cl:03};\n" + decl += "\n" + + for cl in range(nclasses): + decl += "class cl%03i {\n" % cl + decl += "public:\n" + bindings += f' py::class_("cl{cl:03}")\n' + for fn in range(nfns): + ret = random.randint(0, nclasses - 1) + params = [random.randint(0, nclasses - 1) for i in range(nargs)] + decl += f" cl{ret:03} *fn_{fn:03}(" + decl += ", ".join(f"cl{p:03} *" for p in params) + decl += ");\n" + bindings += f' .def("fn_{fn:03}", &cl{cl:03}::fn_{fn:03}, py::return_value_policy())\n' + decl += "};\n\n" + bindings += " ;\n" + + result = "#include \n\n" + result += "namespace py = boost::python;\n\n" + result += decl + "\n" + result += "BOOST_PYTHON_MODULE(example) {\n" + result += bindings + result += "}" + return result + + +for codegen in [generate_dummy_code_pybind11, generate_dummy_code_boost]: + print("{") + for i in range(0, 10): + nclasses = 2**i + with open("test.cpp", "w") as f: + f.write(codegen(nclasses)) + n1 = dt.datetime.now() + os.system( + "g++ -Os -shared -rdynamic -undefined dynamic_lookup " + "-fvisibility=hidden -std=c++14 test.cpp -I include " + "-I /System/Library/Frameworks/Python.framework/Headers -o test.so") + n2 = dt.datetime.now() + elapsed = (n2 - n1).total_seconds() + size = os.stat("test.so").st_size + print(" {%i, %f, %i}," % (nclasses * nfns, elapsed, size)) + print("}") diff --git a/third_party/pybind11/docs/benchmark.rst b/third_party/pybind11/docs/benchmark.rst new file mode 100644 index 0000000000..02c2ccde7d --- /dev/null +++ b/third_party/pybind11/docs/benchmark.rst @@ -0,0 +1,95 @@ +Benchmark +========= + +The following is the result of a synthetic benchmark comparing both compilation +time and module size of pybind11 against Boost.Python. A detailed report about a +Boost.Python to pybind11 conversion of a real project is available here: [#f1]_. + +.. [#f1] http://graylab.jhu.edu/RosettaCon2016/PyRosetta-4.pdf + +Setup +----- + +A python script (see the ``docs/benchmark.py`` file) was used to generate a set +of files with dummy classes whose count increases for each successive benchmark +(between 1 and 2048 classes in powers of two). Each class has four methods with +a randomly generated signature with a return value and four arguments. (There +was no particular reason for this setup other than the desire to generate many +unique function signatures whose count could be controlled in a simple way.) + +Here is an example of the binding code for one class: + +.. code-block:: cpp + + ... + class cl034 { + public: + cl279 *fn_000(cl084 *, cl057 *, cl065 *, cl042 *); + cl025 *fn_001(cl098 *, cl262 *, cl414 *, cl121 *); + cl085 *fn_002(cl445 *, cl297 *, cl145 *, cl421 *); + cl470 *fn_003(cl200 *, cl323 *, cl332 *, cl492 *); + }; + ... + + PYBIND11_MODULE(example, m) { + ... + py::class_(m, "cl034") + .def("fn_000", &cl034::fn_000) + .def("fn_001", &cl034::fn_001) + .def("fn_002", &cl034::fn_002) + .def("fn_003", &cl034::fn_003) + ... + } + +The Boost.Python version looks almost identical except that a return value +policy had to be specified as an argument to ``def()``. For both libraries, +compilation was done with + +.. code-block:: bash + + Apple LLVM version 7.0.2 (clang-700.1.81) + +and the following compilation flags + +.. code-block:: bash + + g++ -Os -shared -rdynamic -undefined dynamic_lookup -fvisibility=hidden -std=c++14 + +Compilation time +---------------- + +The following log-log plot shows how the compilation time grows for an +increasing number of class and function declarations. pybind11 includes many +fewer headers, which initially leads to shorter compilation times, but the +performance is ultimately fairly similar (pybind11 is 19.8 seconds faster for +the largest largest file with 2048 classes and a total of 8192 methods -- a +modest **1.2x** speedup relative to Boost.Python, which required 116.35 +seconds). + +.. only:: not latex + + .. image:: pybind11_vs_boost_python1.svg + +.. only:: latex + + .. image:: pybind11_vs_boost_python1.png + +Module size +----------- + +Differences between the two libraries become much more pronounced when +considering the file size of the generated Python plugin: for the largest file, +the binary generated by Boost.Python required 16.8 MiB, which was **2.17 +times** / **9.1 megabytes** larger than the output generated by pybind11. For +very small inputs, Boost.Python has an edge in the plot below -- however, note +that it stores many definitions in an external library, whose size was not +included here, hence the comparison is slightly shifted in Boost.Python's +favor. + +.. only:: not latex + + .. image:: pybind11_vs_boost_python2.svg + +.. only:: latex + + .. image:: pybind11_vs_boost_python2.png diff --git a/third_party/pybind11/docs/changelog.rst b/third_party/pybind11/docs/changelog.rst new file mode 100644 index 0000000000..67ac79cdd1 --- /dev/null +++ b/third_party/pybind11/docs/changelog.rst @@ -0,0 +1,2327 @@ +.. _changelog: + +Changelog +######### + +Starting with version 1.8.0, pybind11 releases use a `semantic versioning +`_ policy. + +Changes will be added here periodically from the "Suggested changelog entry" +block in pull request descriptions. + +IN DEVELOPMENT +-------------- + +Removed support for Python 2.7, Python 3.5, and MSVC 2015. Support for MSVC +2017 is limited due to availability of CI runners; we highly recommend MSVC +2019 or 2022 be used. + +New features: + +* ``type_caster`` was added. ``std::monostate`` is a tag type + that allows ``std::variant`` to act as an optional, or allows default + construction of a ``std::variant`` holding a non-default constructible type. + `#3818 `_ + +* Support bytearray casting to string. + `#3707 `_ + +Changes: + +* Python 2 support was removed completely. + `#3688 `_ + +* The minimum version for MSVC is now 2017. + `#3722 `_ + +* Improve exception handling in python ``str`` bindings. + `#3826 `_ + +* The bindings for capsules now have more consistent exception handling. + `#3825 `_ + +* Fix exception handling when ``pybind11::weakref()`` fails. + `#3739 `_ + + +Bug fixes: + +* ``PYBIND11_OBJECT_CVT`` and ``PYBIND11_OBJECT_CVT_DEFAULT`` macro can be used + to define classes in namespaces other than pybind11. + `#3797 `_ + +Build system improvements: + +* Add MSVC builds in debug mode to CI. + `#3784 `_ + +* MSVC 2022 C++20 coverage was added to GitHub Actions, including Eigen. + `#3732 `_, + `#3741 `_ + +* Avoid ``setup.py `` usage in internal tests. + `#3734 `_ + + +Backend and tidying up: + +* Remove idioms in code comments. Use inclusive language. + `#3809 `_ + + +Version 2.9.2 (Mar 29, 2022) +---------------------------- + +Changes: + +* Enum now has an ``__index__`` method on Python <3.8 too. + `#3700 `_ + +* Local internals are now cleared after finalizing the interpreter. + `#3744 `_ + +Bug fixes: + +* Better support for Python 3.11 alphas. + `#3694 `_ + +* ``PYBIND11_TYPE_CASTER`` now uses fully qualified symbols, so it can be used + outside of ``pybind11::detail``. + `#3758 `_ + +* Some fixes for PyPy 3.9. + `#3768 `_ + +* Fixed a potential memleak in PyPy in ``get_type_override``. + `#3774 `_ + +* Fix usage of ``VISIBILITY_INLINES_HIDDEN``. + `#3721 `_ + + +Build system improvements: + +* Uses ``sysconfig`` module to determine installation locations on Python >= + 3.10, instead of ``distutils`` which has been deprecated. + `#3764 `_ + +* Support Catch 2.13.5+ (supporting GLIBC 2.34+). + `#3679 `_ + +* Fix test failures with numpy 1.22 by ignoring whitespace when comparing + ``str()`` of dtypes. + `#3682 `_ + + +Backend and tidying up: + +* clang-tidy: added ``readability-qualified-auto``, + ``readability-braces-around-statements``, + ``cppcoreguidelines-prefer-member-initializer``, + ``clang-analyzer-optin.performance.Padding``, + ``cppcoreguidelines-pro-type-static-cast-downcast``, and + ``readability-inconsistent-declaration-parameter-name``. + `#3702 `_, + `#3699 `_, + `#3716 `_, + `#3709 `_ + +* clang-format was added to the pre-commit actions, and the entire code base + automatically reformatted (after several iterations preparing for this leap). + `#3713 `_ + + +Version 2.9.1 (Feb 2, 2022) +--------------------------- + +Changes: + +* If possible, attach Python exception with ``py::raise_from`` to ``TypeError`` + when casting from C++ to Python. This will give additional info if Python + exceptions occur in the caster. Adds a test case of trying to convert a set + from C++ to Python when the hash function is not defined in Python. + `#3605 `_ + +* Add a mapping of C++11 nested exceptions to their Python exception + equivalent using ``py::raise_from``. This attaches the nested exceptions in + Python using the ``__cause__`` field. + `#3608 `_ + +* Propagate Python exception traceback using ``raise_from`` if a pybind11 + function runs out of overloads. + `#3671 `_ + +* ``py::multiple_inheritance`` is now only needed when C++ bases are hidden + from pybind11. + `#3650 `_ and + `#3659 `_ + + +Bug fixes: + +* Remove a boolean cast in ``numpy.h`` that causes MSVC C4800 warnings when + compiling against Python 3.10 or newer. + `#3669 `_ + +* Render ``py::bool_`` and ``py::float_`` as ``bool`` and ``float`` + respectively. + `#3622 `_ + +Build system improvements: + +* Fix CMake extension suffix computation on Python 3.10+. + `#3663 `_ + +* Allow ``CMAKE_ARGS`` to override CMake args in pybind11's own ``setup.py``. + `#3577 `_ + +* Remove a few deprecated c-headers. + `#3610 `_ + +* More uniform handling of test targets. + `#3590 `_ + +* Add clang-tidy readability check to catch potentially swapped function args. + `#3611 `_ + + +Version 2.9.0 (Dec 28, 2021) +---------------------------- + +This is the last version to support Python 2.7 and 3.5. + +New Features: + +* Allow ``py::args`` to be followed by other arguments; the remaining arguments + are implicitly keyword-only, as if a ``py::kw_only{}`` annotation had been + used. + `#3402 `_ + +Changes: + +* Make str/bytes/memoryview more interoperable with ``std::string_view``. + `#3521 `_ + +* Replace ``_`` with ``const_name`` in internals, avoid defining ``pybind::_`` + if ``_`` defined as macro (common gettext usage) + `#3423 `_ + + +Bug fixes: + +* Fix a rare warning about extra copy in an Eigen constructor. + `#3486 `_ + +* Fix caching of the C++ overrides. + `#3465 `_ + +* Add missing ``std::forward`` calls to some ``cpp_function`` overloads. + `#3443 `_ + +* Support PyPy 7.3.7 and the PyPy3.8 beta. Test python-3.11 on PRs with the + ``python dev`` label. + `#3419 `_ + +* Replace usage of deprecated ``Eigen::MappedSparseMatrix`` with + ``Eigen::Map>`` for Eigen 3.3+. + `#3499 `_ + +* Tweaks to support Microsoft Visual Studio 2022. + `#3497 `_ + +Build system improvements: + +* Nicer CMake printout and IDE organisation for pybind11's own tests. + `#3479 `_ + +* CMake: report version type as part of the version string to avoid a spurious + space in the package status message. + `#3472 `_ + +* Flags starting with ``-g`` in ``$CFLAGS`` and ``$CPPFLAGS`` are no longer + overridden by ``.Pybind11Extension``. + `#3436 `_ + +* Ensure ThreadPool is closed in ``setup_helpers``. + `#3548 `_ + +* Avoid LTS on ``mips64`` and ``ppc64le`` (reported broken). + `#3557 `_ + + +v2.8.1 (Oct 27, 2021) +--------------------- + +Changes and additions: + +* The simple namespace creation shortcut added in 2.8.0 was deprecated due to + usage of CPython internal API, and will be removed soon. Use + ``py::module_::import("types").attr("SimpleNamespace")``. + `#3374 `_ + +* Add C++ Exception type to throw and catch ``AttributeError``. Useful for + defining custom ``__setattr__`` and ``__getattr__`` methods. + `#3387 `_ + +Fixes: + +* Fixed the potential for dangling references when using properties with + ``std::optional`` types. + `#3376 `_ + +* Modernize usage of ``PyCodeObject`` on Python 3.9+ (moving toward support for + Python 3.11a1) + `#3368 `_ + +* A long-standing bug in ``eigen.h`` was fixed (originally PR #3343). The bug + was unmasked by newly added ``static_assert``'s in the Eigen 3.4.0 release. + `#3352 `_ + +* Support multiple raw inclusion of CMake helper files (Conan.io does this for + multi-config generators). + `#3420 `_ + +* Fix harmless warning on upcoming CMake 3.22. + `#3368 `_ + +* Fix 2.8.0 regression with MSVC 2017 + C++17 mode + Python 3. + `#3407 `_ + +* Fix 2.8.0 regression that caused undefined behavior (typically + segfaults) in ``make_key_iterator``/``make_value_iterator`` if dereferencing + the iterator returned a temporary value instead of a reference. + `#3348 `_ + + +v2.8.0 (Oct 4, 2021) +-------------------- + +New features: + +* Added ``py::raise_from`` to enable chaining exceptions. + `#3215 `_ + +* Allow exception translators to be optionally registered local to a module + instead of applying globally across all pybind11 modules. Use + ``register_local_exception_translator(ExceptionTranslator&& translator)`` + instead of ``register_exception_translator(ExceptionTranslator&& + translator)`` to keep your exception remapping code local to the module. + `#2650 `_ + +* Add ``make_simple_namespace`` function for instantiating Python + ``SimpleNamespace`` objects. **Deprecated in 2.8.1.** + `#2840 `_ + +* ``pybind11::scoped_interpreter`` and ``initialize_interpreter`` have new + arguments to allow ``sys.argv`` initialization. + `#2341 `_ + +* Allow Python builtins to be used as callbacks in CPython. + `#1413 `_ + +* Added ``view`` to view arrays with a different datatype. + `#987 `_ + +* Implemented ``reshape`` on arrays. + `#984 `_ + +* Enable defining custom ``__new__`` methods on classes by fixing bug + preventing overriding methods if they have non-pybind11 siblings. + `#3265 `_ + +* Add ``make_value_iterator()``, and fix ``make_key_iterator()`` to return + references instead of copies. + `#3293 `_ + +* Improve the classes generated by ``bind_map``: `#3310 `_ + + * Change ``.items`` from an iterator to a dictionary view. + * Add ``.keys`` and ``.values`` (both dictionary views). + * Allow ``__contains__`` to take any object. + +* ``pybind11::custom_type_setup`` was added, for customizing the + ``PyHeapTypeObject`` corresponding to a class, which may be useful for + enabling garbage collection support, among other things. + `#3287 `_ + + +Changes: + +* Set ``__file__`` constant when running ``eval_file`` in an embedded interpreter. + `#3233 `_ + +* Python objects and (C++17) ``std::optional`` now accepted in ``py::slice`` + constructor. + `#1101 `_ + +* The pybind11 proxy types ``str``, ``bytes``, ``bytearray``, ``tuple``, + ``list`` now consistently support passing ``ssize_t`` values for sizes and + indexes. Previously, only ``size_t`` was accepted in several interfaces. + `#3219 `_ + +* Avoid evaluating ``PYBIND11_TLS_REPLACE_VALUE`` arguments more than once. + `#3290 `_ + +Fixes: + +* Bug fix: enum value's ``__int__`` returning non-int when underlying type is + bool or of char type. + `#1334 `_ + +* Fixes bug in setting error state in Capsule's pointer methods. + `#3261 `_ + +* A long-standing memory leak in ``py::cpp_function::initialize`` was fixed. + `#3229 `_ + +* Fixes thread safety for some ``pybind11::type_caster`` which require lifetime + extension, such as for ``std::string_view``. + `#3237 `_ + +* Restore compatibility with gcc 4.8.4 as distributed by ubuntu-trusty, linuxmint-17. + `#3270 `_ + + +Build system improvements: + +* Fix regression in CMake Python package config: improper use of absolute path. + `#3144 `_ + +* Cached Python version information could become stale when CMake was re-run + with a different Python version. The build system now detects this and + updates this information. + `#3299 `_ + +* Specified UTF8-encoding in setup.py calls of open(). + `#3137 `_ + +* Fix a harmless warning from CMake 3.21 with the classic Python discovery. + `#3220 `_ + +* Eigen repo and version can now be specified as cmake options. + `#3324 `_ + + +Backend and tidying up: + +* Reduced thread-local storage required for keeping alive temporary data for + type conversion to one key per ABI version, rather than one key per extension + module. This makes the total thread-local storage required by pybind11 2 + keys per ABI version. + `#3275 `_ + +* Optimize NumPy array construction with additional moves. + `#3183 `_ + +* Conversion to ``std::string`` and ``std::string_view`` now avoids making an + extra copy of the data on Python >= 3.3. + `#3257 `_ + +* Remove const modifier from certain C++ methods on Python collections + (``list``, ``set``, ``dict``) such as (``clear()``, ``append()``, + ``insert()``, etc...) and annotated them with ``py-non-const``. + +* Enable readability ``clang-tidy-const-return`` and remove useless consts. + `#3254 `_ + `#3194 `_ + +* The clang-tidy ``google-explicit-constructor`` option was enabled. + `#3250 `_ + +* Mark a pytype move constructor as noexcept (perf). + `#3236 `_ + +* Enable clang-tidy check to guard against inheritance slicing. + `#3210 `_ + +* Legacy warning suppression pragma were removed from eigen.h. On Unix + platforms, please use -isystem for Eigen include directories, to suppress + compiler warnings originating from Eigen headers. Note that CMake does this + by default. No adjustments are needed for Windows. + `#3198 `_ + +* Format pybind11 with isort consistent ordering of imports + `#3195 `_ + +* The warnings-suppression "pragma clamp" at the top/bottom of pybind11 was + removed, clearing the path to refactoring and IWYU cleanup. + `#3186 `_ + +* Enable most bugprone checks in clang-tidy and fix the found potential bugs + and poor coding styles. + `#3166 `_ + +* Add ``clang-tidy-readability`` rules to make boolean casts explicit improving + code readability. Also enabled other misc and readability clang-tidy checks. + `#3148 `_ + +* Move object in ``.pop()`` for list. + `#3116 `_ + + + + +v2.7.1 (Aug 3, 2021) +--------------------- + +Minor missing functionality added: + +* Allow Python builtins to be used as callbacks in CPython. + `#1413 `_ + +Bug fixes: + +* Fix regression in CMake Python package config: improper use of absolute path. + `#3144 `_ + +* Fix Mingw64 and add to the CI testing matrix. + `#3132 `_ + +* Specified UTF8-encoding in setup.py calls of open(). + `#3137 `_ + +* Add clang-tidy-readability rules to make boolean casts explicit improving + code readability. Also enabled other misc and readability clang-tidy checks. + `#3148 `_ + +* Move object in ``.pop()`` for list. + `#3116 `_ + +Backend and tidying up: + +* Removed and fixed warning suppressions. + `#3127 `_ + `#3129 `_ + `#3135 `_ + `#3141 `_ + `#3142 `_ + `#3150 `_ + `#3152 `_ + `#3160 `_ + `#3161 `_ + + +v2.7.0 (Jul 16, 2021) +--------------------- + +New features: + +* Enable ``py::implicitly_convertible`` for + ``py::class_``-wrapped types. + `#3059 `_ + +* Allow function pointer extraction from overloaded functions. + `#2944 `_ + +* NumPy: added ``.char_()`` to type which gives the NumPy public ``char`` + result, which also distinguishes types by bit length (unlike ``.kind()``). + `#2864 `_ + +* Add ``pybind11::bytearray`` to manipulate ``bytearray`` similar to ``bytes``. + `#2799 `_ + +* ``pybind11/stl/filesystem.h`` registers a type caster that, on C++17/Python + 3.6+, converts ``std::filesystem::path`` to ``pathlib.Path`` and any + ``os.PathLike`` to ``std::filesystem::path``. + `#2730 `_ + +* A ``PYBIND11_VERSION_HEX`` define was added, similar to ``PY_VERSION_HEX``. + `#3120 `_ + + + +Changes: + +* ``py::str`` changed to exclusively hold ``PyUnicodeObject``. Previously + ``py::str`` could also hold ``bytes``, which is probably surprising, was + never documented, and can mask bugs (e.g. accidental use of ``py::str`` + instead of ``py::bytes``). + `#2409 `_ + +* Add a safety guard to ensure that the Python GIL is held when C++ calls back + into Python via ``object_api<>::operator()`` (e.g. ``py::function`` + ``__call__``). (This feature is available for Python 3.6+ only.) + `#2919 `_ + +* Catch a missing ``self`` argument in calls to ``__init__()``. + `#2914 `_ + +* Use ``std::string_view`` if available to avoid a copy when passing an object + to a ``std::ostream``. + `#3042 `_ + +* An important warning about thread safety was added to the ``iostream.h`` + documentation; attempts to make ``py::scoped_ostream_redirect`` thread safe + have been removed, as it was only partially effective. + `#2995 `_ + + +Fixes: + +* Performance: avoid unnecessary strlen calls. + `#3058 `_ + +* Fix auto-generated documentation string when using ``const T`` in + ``pyarray_t``. + `#3020 `_ + +* Unify error messages thrown by ``simple_collector``/``unpacking_collector``. + `#3013 `_ + +* ``pybind11::builtin_exception`` is now explicitly exported, which means the + types included/defined in different modules are identical, and exceptions + raised in different modules can be caught correctly. The documentation was + updated to explain that custom exceptions that are used across module + boundaries need to be explicitly exported as well. + `#2999 `_ + +* Fixed exception when printing UTF-8 to a ``scoped_ostream_redirect``. + `#2982 `_ + +* Pickle support enhancement: ``setstate`` implementation will attempt to + ``setattr`` ``__dict__`` only if the unpickled ``dict`` object is not empty, + to not force use of ``py::dynamic_attr()`` unnecessarily. + `#2972 `_ + +* Allow negative timedelta values to roundtrip. + `#2870 `_ + +* Fix unchecked errors could potentially swallow signals/other exceptions. + `#2863 `_ + +* Add null pointer check with ``std::localtime``. + `#2846 `_ + +* Fix the ``weakref`` constructor from ``py::object`` to create a new + ``weakref`` on conversion. + `#2832 `_ + +* Avoid relying on exceptions in C++17 when getting a ``shared_ptr`` holder + from a ``shared_from_this`` class. + `#2819 `_ + +* Allow the codec's exception to be raised instead of :code:`RuntimeError` when + casting from :code:`py::str` to :code:`std::string`. + `#2903 `_ + + +Build system improvements: + +* In ``setup_helpers.py``, test for platforms that have some multiprocessing + features but lack semaphores, which ``ParallelCompile`` requires. + `#3043 `_ + +* Fix ``pybind11_INCLUDE_DIR`` in case ``CMAKE_INSTALL_INCLUDEDIR`` is + absolute. + `#3005 `_ + +* Fix bug not respecting ``WITH_SOABI`` or ``WITHOUT_SOABI`` to CMake. + `#2938 `_ + +* Fix the default ``Pybind11Extension`` compilation flags with a Mingw64 python. + `#2921 `_ + +* Clang on Windows: do not pass ``/MP`` (ignored flag). + `#2824 `_ + +* ``pybind11.setup_helpers.intree_extensions`` can be used to generate + ``Pybind11Extension`` instances from cpp files placed in the Python package + source tree. + `#2831 `_ + +Backend and tidying up: + +* Enable clang-tidy performance, readability, and modernization checks + throughout the codebase to enforce best coding practices. + `#3046 `_, + `#3049 `_, + `#3051 `_, + `#3052 `_, + `#3080 `_, and + `#3094 `_ + + +* Checks for common misspellings were added to the pre-commit hooks. + `#3076 `_ + +* Changed ``Werror`` to stricter ``Werror-all`` for Intel compiler and fixed + minor issues. + `#2948 `_ + +* Fixed compilation with GCC < 5 when the user defines ``_GLIBCXX_USE_CXX11_ABI``. + `#2956 `_ + +* Added nox support for easier local testing and linting of contributions. + `#3101 `_ and + `#3121 `_ + +* Avoid RTD style issue with docutils 0.17+. + `#3119 `_ + +* Support pipx run, such as ``pipx run pybind11 --include`` for a quick compile. + `#3117 `_ + + + +v2.6.2 (Jan 26, 2021) +--------------------- + +Minor missing functionality added: + +* enum: add missing Enum.value property. + `#2739 `_ + +* Allow thread termination to be avoided during shutdown for CPython 3.7+ via + ``.disarm`` for ``gil_scoped_acquire``/``gil_scoped_release``. + `#2657 `_ + +Fixed or improved behavior in a few special cases: + +* Fix bug where the constructor of ``object`` subclasses would not throw on + being passed a Python object of the wrong type. + `#2701 `_ + +* The ``type_caster`` for integers does not convert Python objects with + ``__int__`` anymore with ``noconvert`` or during the first round of trying + overloads. + `#2698 `_ + +* When casting to a C++ integer, ``__index__`` is always called and not + considered as conversion, consistent with Python 3.8+. + `#2801 `_ + +Build improvements: + +* Setup helpers: ``extra_compile_args`` and ``extra_link_args`` automatically set by + Pybind11Extension are now prepended, which allows them to be overridden + by user-set ``extra_compile_args`` and ``extra_link_args``. + `#2808 `_ + +* Setup helpers: Don't trigger unused parameter warning. + `#2735 `_ + +* CMake: Support running with ``--warn-uninitialized`` active. + `#2806 `_ + +* CMake: Avoid error if included from two submodule directories. + `#2804 `_ + +* CMake: Fix ``STATIC`` / ``SHARED`` being ignored in FindPython mode. + `#2796 `_ + +* CMake: Respect the setting for ``CMAKE_CXX_VISIBILITY_PRESET`` if defined. + `#2793 `_ + +* CMake: Fix issue with FindPython2/FindPython3 not working with ``pybind11::embed``. + `#2662 `_ + +* CMake: mixing local and installed pybind11's would prioritize the installed + one over the local one (regression in 2.6.0). + `#2716 `_ + + +Bug fixes: + +* Fixed segfault in multithreaded environments when using + ``scoped_ostream_redirect``. + `#2675 `_ + +* Leave docstring unset when all docstring-related options are disabled, rather + than set an empty string. + `#2745 `_ + +* The module key in builtins that pybind11 uses to store its internals changed + from std::string to a python str type (more natural on Python 2, no change on + Python 3). + `#2814 `_ + +* Fixed assertion error related to unhandled (later overwritten) exception in + CPython 3.8 and 3.9 debug builds. + `#2685 `_ + +* Fix ``py::gil_scoped_acquire`` assert with CPython 3.9 debug build. + `#2683 `_ + +* Fix issue with a test failing on pytest 6.2. + `#2741 `_ + +Warning fixes: + +* Fix warning modifying constructor parameter 'flag' that shadows a field of + 'set_flag' ``[-Wshadow-field-in-constructor-modified]``. + `#2780 `_ + +* Suppressed some deprecation warnings about old-style + ``__init__``/``__setstate__`` in the tests. + `#2759 `_ + +Valgrind work: + +* Fix invalid access when calling a pybind11 ``__init__`` on a non-pybind11 + class instance. + `#2755 `_ + +* Fixed various minor memory leaks in pybind11's test suite. + `#2758 `_ + +* Resolved memory leak in cpp_function initialization when exceptions occurred. + `#2756 `_ + +* Added a Valgrind build, checking for leaks and memory-related UB, to CI. + `#2746 `_ + +Compiler support: + +* Intel compiler was not activating C++14 support due to a broken define. + `#2679 `_ + +* Support ICC and NVIDIA HPC SDK in C++17 mode. + `#2729 `_ + +* Support Intel OneAPI compiler (ICC 20.2) and add to CI. + `#2573 `_ + + + +v2.6.1 (Nov 11, 2020) +--------------------- + +* ``py::exec``, ``py::eval``, and ``py::eval_file`` now add the builtins module + as ``"__builtins__"`` to their ``globals`` argument, better matching ``exec`` + and ``eval`` in pure Python. + `#2616 `_ + +* ``setup_helpers`` will no longer set a minimum macOS version higher than the + current version. + `#2622 `_ + +* Allow deleting static properties. + `#2629 `_ + +* Seal a leak in ``def_buffer``, cleaning up the ``capture`` object after the + ``class_`` object goes out of scope. + `#2634 `_ + +* ``pybind11_INCLUDE_DIRS`` was incorrect, potentially causing a regression if + it was expected to include ``PYTHON_INCLUDE_DIRS`` (please use targets + instead). + `#2636 `_ + +* Added parameter names to the ``py::enum_`` constructor and methods, avoiding + ``arg0`` in the generated docstrings. + `#2637 `_ + +* Added ``needs_recompile`` optional function to the ``ParallelCompiler`` + helper, to allow a recompile to be skipped based on a user-defined function. + `#2643 `_ + + +v2.6.0 (Oct 21, 2020) +--------------------- + +See :ref:`upgrade-guide-2.6` for help upgrading to the new version. + +New features: + +* Keyword-only arguments supported in Python 2 or 3 with ``py::kw_only()``. + `#2100 `_ + +* Positional-only arguments supported in Python 2 or 3 with ``py::pos_only()``. + `#2459 `_ + +* ``py::is_final()`` class modifier to block subclassing (CPython only). + `#2151 `_ + +* Added ``py::prepend()``, allowing a function to be placed at the beginning of + the overload chain. + `#1131 `_ + +* Access to the type object now provided with ``py::type::of()`` and + ``py::type::of(h)``. + `#2364 `_ + +* Perfect forwarding support for methods. + `#2048 `_ + +* Added ``py::error_already_set::discard_as_unraisable()``. + `#2372 `_ + +* ``py::hash`` is now public. + `#2217 `_ + +* ``py::class_`` is now supported. Note that writing to one data + member of the union and reading another (type punning) is UB in C++. Thus + pybind11-bound enums should never be used for such conversions. + `#2320 `_. + +* Classes now check local scope when registering members, allowing a subclass + to have a member with the same name as a parent (such as an enum). + `#2335 `_ + +Code correctness features: + +* Error now thrown when ``__init__`` is forgotten on subclasses. + `#2152 `_ + +* Throw error if conversion to a pybind11 type if the Python object isn't a + valid instance of that type, such as ``py::bytes(o)`` when ``py::object o`` + isn't a bytes instance. + `#2349 `_ + +* Throw if conversion to ``str`` fails. + `#2477 `_ + + +API changes: + +* ``py::module`` was renamed ``py::module_`` to avoid issues with C++20 when + used unqualified, but an alias ``py::module`` is provided for backward + compatibility. + `#2489 `_ + +* Public constructors for ``py::module_`` have been deprecated; please use + ``pybind11::module_::create_extension_module`` if you were using the public + constructor (fairly rare after ``PYBIND11_MODULE`` was introduced). + `#2552 `_ + +* ``PYBIND11_OVERLOAD*`` macros and ``get_overload`` function replaced by + correctly-named ``PYBIND11_OVERRIDE*`` and ``get_override``, fixing + inconsistencies in the presence of a closing ``;`` in these macros. + ``get_type_overload`` is deprecated. + `#2325 `_ + +Packaging / building improvements: + +* The Python package was reworked to be more powerful and useful. + `#2433 `_ + + * :ref:`build-setuptools` is easier thanks to a new + ``pybind11.setup_helpers`` module, which provides utilities to use + setuptools with pybind11. It can be used via PEP 518, ``setup_requires``, + or by directly importing or copying ``setup_helpers.py`` into your project. + + * CMake configuration files are now included in the Python package. Use + ``pybind11.get_cmake_dir()`` or ``python -m pybind11 --cmakedir`` to get + the directory with the CMake configuration files, or include the + site-packages location in your ``CMAKE_MODULE_PATH``. Or you can use the + new ``pybind11[global]`` extra when you install ``pybind11``, which + installs the CMake files and headers into your base environment in the + standard location. + + * ``pybind11-config`` is another way to write ``python -m pybind11`` if you + have your PATH set up. + + * Added external typing support to the helper module, code from + ``import pybind11`` can now be type checked. + `#2588 `_ + +* Minimum CMake required increased to 3.4. + `#2338 `_ and + `#2370 `_ + + * Full integration with CMake's C++ standard system and compile features + replaces ``PYBIND11_CPP_STANDARD``. + + * Generated config file is now portable to different Python/compiler/CMake + versions. + + * Virtual environments prioritized if ``PYTHON_EXECUTABLE`` is not set + (``venv``, ``virtualenv``, and ``conda``) (similar to the new FindPython + mode). + + * Other CMake features now natively supported, like + ``CMAKE_INTERPROCEDURAL_OPTIMIZATION``, ``set(CMAKE_CXX_VISIBILITY_PRESET + hidden)``. + + * ``CUDA`` as a language is now supported. + + * Helper functions ``pybind11_strip``, ``pybind11_extension``, + ``pybind11_find_import`` added, see :doc:`cmake/index`. + + * Optional :ref:`find-python-mode` and :ref:`nopython-mode` with CMake. + `#2370 `_ + +* Uninstall target added. + `#2265 `_ and + `#2346 `_ + +* ``pybind11_add_module()`` now accepts an optional ``OPT_SIZE`` flag that + switches the binding target to size-based optimization if the global build + type can not always be fixed to ``MinSizeRel`` (except in debug mode, where + optimizations remain disabled). ``MinSizeRel`` or this flag reduces binary + size quite substantially (~25% on some platforms). + `#2463 `_ + +Smaller or developer focused features and fixes: + +* Moved ``mkdoc.py`` to a new repo, `pybind11-mkdoc`_. There are no longer + submodules in the main repo. + +* ``py::memoryview`` segfault fix and update, with new + ``py::memoryview::from_memory`` in Python 3, and documentation. + `#2223 `_ + +* Fix for ``buffer_info`` on Python 2. + `#2503 `_ + +* If ``__eq__`` defined but not ``__hash__``, ``__hash__`` is now set to + ``None``. + `#2291 `_ + +* ``py::ellipsis`` now also works on Python 2. + `#2360 `_ + +* Pointer to ``std::tuple`` & ``std::pair`` supported in cast. + `#2334 `_ + +* Small fixes in NumPy support. ``py::array`` now uses ``py::ssize_t`` as first + argument type. + `#2293 `_ + +* Added missing signature for ``py::array``. + `#2363 `_ + +* ``unchecked_mutable_reference`` has access to operator ``()`` and ``[]`` when + const. + `#2514 `_ + +* ``py::vectorize`` is now supported on functions that return void. + `#1969 `_ + +* ``py::capsule`` supports ``get_pointer`` and ``set_pointer``. + `#1131 `_ + +* Fix crash when different instances share the same pointer of the same type. + `#2252 `_ + +* Fix for ``py::len`` not clearing Python's error state when it fails and throws. + `#2575 `_ + +* Bugfixes related to more extensive testing, new GitHub Actions CI. + `#2321 `_ + +* Bug in timezone issue in Eastern hemisphere midnight fixed. + `#2438 `_ + +* ``std::chrono::time_point`` now works when the resolution is not the same as + the system. + `#2481 `_ + +* Bug fixed where ``py::array_t`` could accept arrays that did not match the + requested ordering. + `#2484 `_ + +* Avoid a segfault on some compilers when types are removed in Python. + `#2564 `_ + +* ``py::arg::none()`` is now also respected when passing keyword arguments. + `#2611 `_ + +* PyPy fixes, PyPy 7.3.x now supported, including PyPy3. (Known issue with + PyPy2 and Windows `#2596 `_). + `#2146 `_ + +* CPython 3.9.0 workaround for undefined behavior (macOS segfault). + `#2576 `_ + +* CPython 3.9 warning fixes. + `#2253 `_ + +* Improved C++20 support, now tested in CI. + `#2489 `_ + `#2599 `_ + +* Improved but still incomplete debug Python interpreter support. + `#2025 `_ + +* NVCC (CUDA 11) now supported and tested in CI. + `#2461 `_ + +* NVIDIA PGI compilers now supported and tested in CI. + `#2475 `_ + +* At least Intel 18 now explicitly required when compiling with Intel. + `#2577 `_ + +* Extensive style checking in CI, with `pre-commit`_ support. Code + modernization, checked by clang-tidy. + +* Expanded docs, including new main page, new installing section, and CMake + helpers page, along with over a dozen new sections on existing pages. + +* In GitHub, new docs for contributing and new issue templates. + +.. _pre-commit: https://pre-commit.com + +.. _pybind11-mkdoc: https://github.com/pybind/pybind11-mkdoc + +v2.5.0 (Mar 31, 2020) +----------------------------------------------------- + +* Use C++17 fold expressions in type casters, if available. This can + improve performance during overload resolution when functions have + multiple arguments. + `#2043 `_. + +* Changed include directory resolution in ``pybind11/__init__.py`` + and installation in ``setup.py``. This fixes a number of open issues + where pybind11 headers could not be found in certain environments. + `#1995 `_. + +* C++20 ``char8_t`` and ``u8string`` support. `#2026 + `_. + +* CMake: search for Python 3.9. `bb9c91 + `_. + +* Fixes for MSYS-based build environments. + `#2087 `_, + `#2053 `_. + +* STL bindings for ``std::vector<...>::clear``. `#2074 + `_. + +* Read-only flag for ``py::buffer``. `#1466 + `_. + +* Exception handling during module initialization. + `bf2b031 `_. + +* Support linking against a CPython debug build. + `#2025 `_. + +* Fixed issues involving the availability and use of aligned ``new`` and + ``delete``. `#1988 `_, + `759221 `_. + +* Fixed a resource leak upon interpreter shutdown. + `#2020 `_. + +* Fixed error handling in the boolean caster. + `#1976 `_. + +v2.4.3 (Oct 15, 2019) +----------------------------------------------------- + +* Adapt pybind11 to a C API convention change in Python 3.8. `#1950 + `_. + +v2.4.2 (Sep 21, 2019) +----------------------------------------------------- + +* Replaced usage of a C++14 only construct. `#1929 + `_. + +* Made an ifdef future-proof for Python >= 4. `f3109d + `_. + +v2.4.1 (Sep 20, 2019) +----------------------------------------------------- + +* Fixed a problem involving implicit conversion from enumerations to integers + on Python 3.8. `#1780 `_. + +v2.4.0 (Sep 19, 2019) +----------------------------------------------------- + +* Try harder to keep pybind11-internal data structures separate when there + are potential ABI incompatibilities. Fixes crashes that occurred when loading + multiple pybind11 extensions that were e.g. compiled by GCC (libstdc++) + and Clang (libc++). + `#1588 `_ and + `c9f5a `_. + +* Added support for ``__await__``, ``__aiter__``, and ``__anext__`` protocols. + `#1842 `_. + +* ``pybind11_add_module()``: don't strip symbols when compiling in + ``RelWithDebInfo`` mode. `#1980 + `_. + +* ``enum_``: Reproduce Python behavior when comparing against invalid values + (e.g. ``None``, strings, etc.). Add back support for ``__invert__()``. + `#1912 `_, + `#1907 `_. + +* List insertion operation for ``py::list``. + Added ``.empty()`` to all collection types. + Added ``py::set::contains()`` and ``py::dict::contains()``. + `#1887 `_, + `#1884 `_, + `#1888 `_. + +* ``py::details::overload_cast_impl`` is available in C++11 mode, can be used + like ``overload_cast`` with an additional set of parentheses. + `#1581 `_. + +* Fixed ``get_include()`` on Conda. + `#1877 `_. + +* ``stl_bind.h``: negative indexing support. + `#1882 `_. + +* Minor CMake fix to add MinGW compatibility. + `#1851 `_. + +* GIL-related fixes. + `#1836 `_, + `8b90b `_. + +* Other very minor/subtle fixes and improvements. + `#1329 `_, + `#1910 `_, + `#1863 `_, + `#1847 `_, + `#1890 `_, + `#1860 `_, + `#1848 `_, + `#1821 `_, + `#1837 `_, + `#1833 `_, + `#1748 `_, + `#1852 `_. + +v2.3.0 (June 11, 2019) +----------------------------------------------------- + +* Significantly reduced module binary size (10-20%) when compiled in C++11 mode + with GCC/Clang, or in any mode with MSVC. Function signatures are now always + precomputed at compile time (this was previously only available in C++14 mode + for non-MSVC compilers). + `#934 `_. + +* Add basic support for tag-based static polymorphism, where classes + provide a method to returns the desired type of an instance. + `#1326 `_. + +* Python type wrappers (``py::handle``, ``py::object``, etc.) + now support map Python's number protocol onto C++ arithmetic + operators such as ``operator+``, ``operator/=``, etc. + `#1511 `_. + +* A number of improvements related to enumerations: + + 1. The ``enum_`` implementation was rewritten from scratch to reduce + code bloat. Rather than instantiating a full implementation for each + enumeration, most code is now contained in a generic base class. + `#1511 `_. + + 2. The ``value()`` method of ``py::enum_`` now accepts an optional + docstring that will be shown in the documentation of the associated + enumeration. `#1160 `_. + + 3. check for already existing enum value and throw an error if present. + `#1453 `_. + +* Support for over-aligned type allocation via C++17's aligned ``new`` + statement. `#1582 `_. + +* Added ``py::ellipsis()`` method for slicing of multidimensional NumPy arrays + `#1502 `_. + +* Numerous Improvements to the ``mkdoc.py`` script for extracting documentation + from C++ header files. + `#1788 `_. + +* ``pybind11_add_module()``: allow including Python as a ``SYSTEM`` include path. + `#1416 `_. + +* ``pybind11/stl.h`` does not convert strings to ``vector`` anymore. + `#1258 `_. + +* Mark static methods as such to fix auto-generated Sphinx documentation. + `#1732 `_. + +* Re-throw forced unwind exceptions (e.g. during pthread termination). + `#1208 `_. + +* Added ``__contains__`` method to the bindings of maps (``std::map``, + ``std::unordered_map``). + `#1767 `_. + +* Improvements to ``gil_scoped_acquire``. + `#1211 `_. + +* Type caster support for ``std::deque``. + `#1609 `_. + +* Support for ``std::unique_ptr`` holders, whose deleters differ between a base and derived + class. `#1353 `_. + +* Construction of STL array/vector-like data structures from + iterators. Added an ``extend()`` operation. + `#1709 `_, + +* CMake build system improvements for projects that include non-C++ + files (e.g. plain C, CUDA) in ``pybind11_add_module`` et al. + `#1678 `_. + +* Fixed asynchronous invocation and deallocation of Python functions + wrapped in ``std::function``. + `#1595 `_. + +* Fixes regarding return value policy propagation in STL type casters. + `#1603 `_. + +* Fixed scoped enum comparisons. + `#1571 `_. + +* Fixed iostream redirection for code that releases the GIL. + `#1368 `_, + +* A number of CI-related fixes. + `#1757 `_, + `#1744 `_, + `#1670 `_. + +v2.2.4 (September 11, 2018) +----------------------------------------------------- + +* Use new Python 3.7 Thread Specific Storage (TSS) implementation if available. + `#1454 `_, + `#1517 `_. + +* Fixes for newer MSVC versions and C++17 mode. + `#1347 `_, + `#1462 `_. + +* Propagate return value policies to type-specific casters + when casting STL containers. + `#1455 `_. + +* Allow ostream-redirection of more than 1024 characters. + `#1479 `_. + +* Set ``Py_DEBUG`` define when compiling against a debug Python build. + `#1438 `_. + +* Untangle integer logic in number type caster to work for custom + types that may only be castable to a restricted set of builtin types. + `#1442 `_. + +* CMake build system: Remember Python version in cache file. + `#1434 `_. + +* Fix for custom smart pointers: use ``std::addressof`` to obtain holder + address instead of ``operator&``. + `#1435 `_. + +* Properly report exceptions thrown during module initialization. + `#1362 `_. + +* Fixed a segmentation fault when creating empty-shaped NumPy array. + `#1371 `_. + +* The version of Intel C++ compiler must be >= 2017, and this is now checked by + the header files. `#1363 `_. + +* A few minor typo fixes and improvements to the test suite, and + patches that silence compiler warnings. + +* Vectors now support construction from generators, as well as ``extend()`` from a + list or generator. + `#1496 `_. + + +v2.2.3 (April 29, 2018) +----------------------------------------------------- + +* The pybind11 header location detection was replaced by a new implementation + that no longer depends on ``pip`` internals (the recently released ``pip`` + 10 has restricted access to this API). + `#1190 `_. + +* Small adjustment to an implementation detail to work around a compiler segmentation fault in Clang 3.3/3.4. + `#1350 `_. + +* The minimal supported version of the Intel compiler was >= 17.0 since + pybind11 v2.1. This check is now explicit, and a compile-time error is raised + if the compiler meet the requirement. + `#1363 `_. + +* Fixed an endianness-related fault in the test suite. + `#1287 `_. + +v2.2.2 (February 7, 2018) +----------------------------------------------------- + +* Fixed a segfault when combining embedded interpreter + shutdown/reinitialization with external loaded pybind11 modules. + `#1092 `_. + +* Eigen support: fixed a bug where Nx1/1xN numpy inputs couldn't be passed as + arguments to Eigen vectors (which for Eigen are simply compile-time fixed + Nx1/1xN matrices). + `#1106 `_. + +* Clarified to license by moving the licensing of contributions from + ``LICENSE`` into ``CONTRIBUTING.md``: the licensing of contributions is not + actually part of the software license as distributed. This isn't meant to be + a substantial change in the licensing of the project, but addresses concerns + that the clause made the license non-standard. + `#1109 `_. + +* Fixed a regression introduced in 2.1 that broke binding functions with lvalue + character literal arguments. + `#1128 `_. + +* MSVC: fix for compilation failures under /permissive-, and added the flag to + the appveyor test suite. + `#1155 `_. + +* Fixed ``__qualname__`` generation, and in turn, fixes how class names + (especially nested class names) are shown in generated docstrings. + `#1171 `_. + +* Updated the FAQ with a suggested project citation reference. + `#1189 `_. + +* Added fixes for deprecation warnings when compiled under C++17 with + ``-Wdeprecated`` turned on, and add ``-Wdeprecated`` to the test suite + compilation flags. + `#1191 `_. + +* Fixed outdated PyPI URLs in ``setup.py``. + `#1213 `_. + +* Fixed a refcount leak for arguments that end up in a ``py::args`` argument + for functions with both fixed positional and ``py::args`` arguments. + `#1216 `_. + +* Fixed a potential segfault resulting from possible premature destruction of + ``py::args``/``py::kwargs`` arguments with overloaded functions. + `#1223 `_. + +* Fixed ``del map[item]`` for a ``stl_bind.h`` bound stl map. + `#1229 `_. + +* Fixed a regression from v2.1.x where the aggregate initialization could + unintentionally end up at a constructor taking a templated + ``std::initializer_list`` argument. + `#1249 `_. + +* Fixed an issue where calling a function with a keep_alive policy on the same + nurse/patient pair would cause the internal patient storage to needlessly + grow (unboundedly, if the nurse is long-lived). + `#1251 `_. + +* Various other minor fixes. + +v2.2.1 (September 14, 2017) +----------------------------------------------------- + +* Added ``py::module_::reload()`` member function for reloading a module. + `#1040 `_. + +* Fixed a reference leak in the number converter. + `#1078 `_. + +* Fixed compilation with Clang on host GCC < 5 (old libstdc++ which isn't fully + C++11 compliant). `#1062 `_. + +* Fixed a regression where the automatic ``std::vector`` caster would + fail to compile. The same fix also applies to any container which returns + element proxies instead of references. + `#1053 `_. + +* Fixed a regression where the ``py::keep_alive`` policy could not be applied + to constructors. `#1065 `_. + +* Fixed a nullptr dereference when loading a ``py::module_local`` type + that's only registered in an external module. + `#1058 `_. + +* Fixed implicit conversion of accessors to types derived from ``py::object``. + `#1076 `_. + +* The ``name`` in ``PYBIND11_MODULE(name, variable)`` can now be a macro. + `#1082 `_. + +* Relaxed overly strict ``py::pickle()`` check for matching get and set types. + `#1064 `_. + +* Conversion errors now try to be more informative when it's likely that + a missing header is the cause (e.g. forgetting ````). + `#1077 `_. + +v2.2.0 (August 31, 2017) +----------------------------------------------------- + +* Support for embedding the Python interpreter. See the + :doc:`documentation page ` for a + full overview of the new features. + `#774 `_, + `#889 `_, + `#892 `_, + `#920 `_. + + .. code-block:: cpp + + #include + namespace py = pybind11; + + int main() { + py::scoped_interpreter guard{}; // start the interpreter and keep it alive + + py::print("Hello, World!"); // use the Python API + } + +* Support for inheriting from multiple C++ bases in Python. + `#693 `_. + + .. code-block:: python + + from cpp_module import CppBase1, CppBase2 + + + class PyDerived(CppBase1, CppBase2): + def __init__(self): + CppBase1.__init__(self) # C++ bases must be initialized explicitly + CppBase2.__init__(self) + +* ``PYBIND11_MODULE`` is now the preferred way to create module entry points. + ``PYBIND11_PLUGIN`` is deprecated. See :ref:`macros` for details. + `#879 `_. + + .. code-block:: cpp + + // new + PYBIND11_MODULE(example, m) { + m.def("add", [](int a, int b) { return a + b; }); + } + + // old + PYBIND11_PLUGIN(example) { + py::module m("example"); + m.def("add", [](int a, int b) { return a + b; }); + return m.ptr(); + } + +* pybind11's headers and build system now more strictly enforce hidden symbol + visibility for extension modules. This should be seamless for most users, + but see the :doc:`upgrade` if you use a custom build system. + `#995 `_. + +* Support for ``py::module_local`` types which allow multiple modules to + export the same C++ types without conflicts. This is useful for opaque + types like ``std::vector``. ``py::bind_vector`` and ``py::bind_map`` + now default to ``py::module_local`` if their elements are builtins or + local types. See :ref:`module_local` for details. + `#949 `_, + `#981 `_, + `#995 `_, + `#997 `_. + +* Custom constructors can now be added very easily using lambdas or factory + functions which return a class instance by value, pointer or holder. This + supersedes the old placement-new ``__init__`` technique. + See :ref:`custom_constructors` for details. + `#805 `_, + `#1014 `_. + + .. code-block:: cpp + + struct Example { + Example(std::string); + }; + + py::class_(m, "Example") + .def(py::init()) // existing constructor + .def(py::init([](int n) { // custom constructor + return std::make_unique(std::to_string(n)); + })); + +* Similarly to custom constructors, pickling support functions are now bound + using the ``py::pickle()`` adaptor which improves type safety. See the + :doc:`upgrade` and :ref:`pickling` for details. + `#1038 `_. + +* Builtin support for converting C++17 standard library types and general + conversion improvements: + + 1. C++17 ``std::variant`` is supported right out of the box. C++11/14 + equivalents (e.g. ``boost::variant``) can also be added with a simple + user-defined specialization. See :ref:`cpp17_container_casters` for details. + `#811 `_, + `#845 `_, + `#989 `_. + + 2. Out-of-the-box support for C++17 ``std::string_view``. + `#906 `_. + + 3. Improved compatibility of the builtin ``optional`` converter. + `#874 `_. + + 4. The ``bool`` converter now accepts ``numpy.bool_`` and types which + define ``__bool__`` (Python 3.x) or ``__nonzero__`` (Python 2.7). + `#925 `_. + + 5. C++-to-Python casters are now more efficient and move elements out + of rvalue containers whenever possible. + `#851 `_, + `#936 `_, + `#938 `_. + + 6. Fixed ``bytes`` to ``std::string/char*`` conversion on Python 3. + `#817 `_. + + 7. Fixed lifetime of temporary C++ objects created in Python-to-C++ conversions. + `#924 `_. + +* Scope guard call policy for RAII types, e.g. ``py::call_guard()``, + ``py::call_guard()``. See :ref:`call_policies` for details. + `#740 `_. + +* Utility for redirecting C++ streams to Python (e.g. ``std::cout`` -> + ``sys.stdout``). Scope guard ``py::scoped_ostream_redirect`` in C++ and + a context manager in Python. See :ref:`ostream_redirect`. + `#1009 `_. + +* Improved handling of types and exceptions across module boundaries. + `#915 `_, + `#951 `_, + `#995 `_. + +* Fixed destruction order of ``py::keep_alive`` nurse/patient objects + in reference cycles. + `#856 `_. + +* NumPy and buffer protocol related improvements: + + 1. Support for negative strides in Python buffer objects/numpy arrays. This + required changing integers from unsigned to signed for the related C++ APIs. + Note: If you have compiler warnings enabled, you may notice some new conversion + warnings after upgrading. These can be resolved with ``static_cast``. + `#782 `_. + + 2. Support ``std::complex`` and arrays inside ``PYBIND11_NUMPY_DTYPE``. + `#831 `_, + `#832 `_. + + 3. Support for constructing ``py::buffer_info`` and ``py::arrays`` using + arbitrary containers or iterators instead of requiring a ``std::vector``. + `#788 `_, + `#822 `_, + `#860 `_. + + 4. Explicitly check numpy version and require >= 1.7.0. + `#819 `_. + +* Support for allowing/prohibiting ``None`` for specific arguments and improved + ``None`` overload resolution order. See :ref:`none_arguments` for details. + `#843 `_. + `#859 `_. + +* Added ``py::exec()`` as a shortcut for ``py::eval()`` + and support for C++11 raw string literals as input. See :ref:`eval`. + `#766 `_, + `#827 `_. + +* ``py::vectorize()`` ignores non-vectorizable arguments and supports + member functions. + `#762 `_. + +* Support for bound methods as callbacks (``pybind11/functional.h``). + `#815 `_. + +* Allow aliasing pybind11 methods: ``cls.attr("foo") = cls.attr("bar")``. + `#802 `_. + +* Don't allow mixed static/non-static overloads. + `#804 `_. + +* Fixed overriding static properties in derived classes. + `#784 `_. + +* Added support for write only properties. + `#1144 `_. + +* Improved deduction of member functions of a derived class when its bases + aren't registered with pybind11. + `#855 `_. + + .. code-block:: cpp + + struct Base { + int foo() { return 42; } + } + + struct Derived : Base {} + + // Now works, but previously required also binding `Base` + py::class_(m, "Derived") + .def("foo", &Derived::foo); // function is actually from `Base` + +* The implementation of ``py::init<>`` now uses C++11 brace initialization + syntax to construct instances, which permits binding implicit constructors of + aggregate types. `#1015 `_. + + .. code-block:: cpp + + struct Aggregate { + int a; + std::string b; + }; + + py::class_(m, "Aggregate") + .def(py::init()); + +* Fixed issues with multiple inheritance with offset base/derived pointers. + `#812 `_, + `#866 `_, + `#960 `_. + +* Fixed reference leak of type objects. + `#1030 `_. + +* Improved support for the ``/std:c++14`` and ``/std:c++latest`` modes + on MSVC 2017. + `#841 `_, + `#999 `_. + +* Fixed detection of private operator new on MSVC. + `#893 `_, + `#918 `_. + +* Intel C++ compiler compatibility fixes. + `#937 `_. + +* Fixed implicit conversion of ``py::enum_`` to integer types on Python 2.7. + `#821 `_. + +* Added ``py::hash`` to fetch the hash value of Python objects, and + ``.def(hash(py::self))`` to provide the C++ ``std::hash`` as the Python + ``__hash__`` method. + `#1034 `_. + +* Fixed ``__truediv__`` on Python 2 and ``__itruediv__`` on Python 3. + `#867 `_. + +* ``py::capsule`` objects now support the ``name`` attribute. This is useful + for interfacing with ``scipy.LowLevelCallable``. + `#902 `_. + +* Fixed ``py::make_iterator``'s ``__next__()`` for past-the-end calls. + `#897 `_. + +* Added ``error_already_set::matches()`` for checking Python exceptions. + `#772 `_. + +* Deprecated ``py::error_already_set::clear()``. It's no longer needed + following a simplification of the ``py::error_already_set`` class. + `#954 `_. + +* Deprecated ``py::handle::operator==()`` in favor of ``py::handle::is()`` + `#825 `_. + +* Deprecated ``py::object::borrowed``/``py::object::stolen``. + Use ``py::object::borrowed_t{}``/``py::object::stolen_t{}`` instead. + `#771 `_. + +* Changed internal data structure versioning to avoid conflicts between + modules compiled with different revisions of pybind11. + `#1012 `_. + +* Additional compile-time and run-time error checking and more informative messages. + `#786 `_, + `#794 `_, + `#803 `_. + +* Various minor improvements and fixes. + `#764 `_, + `#791 `_, + `#795 `_, + `#840 `_, + `#844 `_, + `#846 `_, + `#849 `_, + `#858 `_, + `#862 `_, + `#871 `_, + `#872 `_, + `#881 `_, + `#888 `_, + `#899 `_, + `#928 `_, + `#931 `_, + `#944 `_, + `#950 `_, + `#952 `_, + `#962 `_, + `#965 `_, + `#970 `_, + `#978 `_, + `#979 `_, + `#986 `_, + `#1020 `_, + `#1027 `_, + `#1037 `_. + +* Testing improvements. + `#798 `_, + `#882 `_, + `#898 `_, + `#900 `_, + `#921 `_, + `#923 `_, + `#963 `_. + +v2.1.1 (April 7, 2017) +----------------------------------------------------- + +* Fixed minimum version requirement for MSVC 2015u3 + `#773 `_. + +v2.1.0 (March 22, 2017) +----------------------------------------------------- + +* pybind11 now performs function overload resolution in two phases. The first + phase only considers exact type matches, while the second allows for implicit + conversions to take place. A special ``noconvert()`` syntax can be used to + completely disable implicit conversions for specific arguments. + `#643 `_, + `#634 `_, + `#650 `_. + +* Fixed a regression where static properties no longer worked with classes + using multiple inheritance. The ``py::metaclass`` attribute is no longer + necessary (and deprecated as of this release) when binding classes with + static properties. + `#679 `_, + +* Classes bound using ``pybind11`` can now use custom metaclasses. + `#679 `_, + +* ``py::args`` and ``py::kwargs`` can now be mixed with other positional + arguments when binding functions using pybind11. + `#611 `_. + +* Improved support for C++11 unicode string and character types; added + extensive documentation regarding pybind11's string conversion behavior. + `#624 `_, + `#636 `_, + `#715 `_. + +* pybind11 can now avoid expensive copies when converting Eigen arrays to NumPy + arrays (and vice versa). `#610 `_. + +* The "fast path" in ``py::vectorize`` now works for any full-size group of C or + F-contiguous arrays. The non-fast path is also faster since it no longer performs + copies of the input arguments (except when type conversions are necessary). + `#610 `_. + +* Added fast, unchecked access to NumPy arrays via a proxy object. + `#746 `_. + +* Transparent support for class-specific ``operator new`` and + ``operator delete`` implementations. + `#755 `_. + +* Slimmer and more efficient STL-compatible iterator interface for sequence types. + `#662 `_. + +* Improved custom holder type support. + `#607 `_. + +* ``nullptr`` to ``None`` conversion fixed in various builtin type casters. + `#732 `_. + +* ``enum_`` now exposes its members via a special ``__members__`` attribute. + `#666 `_. + +* ``std::vector`` bindings created using ``stl_bind.h`` can now optionally + implement the buffer protocol. `#488 `_. + +* Automated C++ reference documentation using doxygen and breathe. + `#598 `_. + +* Added minimum compiler version assertions. + `#727 `_. + +* Improved compatibility with C++1z. + `#677 `_. + +* Improved ``py::capsule`` API. Can be used to implement cleanup + callbacks that are involved at module destruction time. + `#752 `_. + +* Various minor improvements and fixes. + `#595 `_, + `#588 `_, + `#589 `_, + `#603 `_, + `#619 `_, + `#648 `_, + `#695 `_, + `#720 `_, + `#723 `_, + `#729 `_, + `#724 `_, + `#742 `_, + `#753 `_. + +v2.0.1 (Jan 4, 2017) +----------------------------------------------------- + +* Fix pointer to reference error in type_caster on MSVC + `#583 `_. + +* Fixed a segmentation in the test suite due to a typo + `cd7eac `_. + +v2.0.0 (Jan 1, 2017) +----------------------------------------------------- + +* Fixed a reference counting regression affecting types with custom metaclasses + (introduced in v2.0.0-rc1). + `#571 `_. + +* Quenched a CMake policy warning. + `#570 `_. + +v2.0.0-rc1 (Dec 23, 2016) +----------------------------------------------------- + +The pybind11 developers are excited to issue a release candidate of pybind11 +with a subsequent v2.0.0 release planned in early January next year. + +An incredible amount of effort by went into pybind11 over the last ~5 months, +leading to a release that is jam-packed with exciting new features and numerous +usability improvements. The following list links PRs or individual commits +whenever applicable. + +Happy Christmas! + +* Support for binding C++ class hierarchies that make use of multiple + inheritance. `#410 `_. + +* PyPy support: pybind11 now supports nightly builds of PyPy and will + interoperate with the future 5.7 release. No code changes are necessary, + everything "just" works as usual. Note that we only target the Python 2.7 + branch for now; support for 3.x will be added once its ``cpyext`` extension + support catches up. A few minor features remain unsupported for the time + being (notably dynamic attributes in custom types). + `#527 `_. + +* Significant work on the documentation -- in particular, the monolithic + ``advanced.rst`` file was restructured into a easier to read hierarchical + organization. `#448 `_. + +* Many NumPy-related improvements: + + 1. Object-oriented API to access and modify NumPy ``ndarray`` instances, + replicating much of the corresponding NumPy C API functionality. + `#402 `_. + + 2. NumPy array ``dtype`` array descriptors are now first-class citizens and + are exposed via a new class ``py::dtype``. + + 3. Structured dtypes can be registered using the ``PYBIND11_NUMPY_DTYPE()`` + macro. Special ``array`` constructors accepting dtype objects were also + added. + + One potential caveat involving this change: format descriptor strings + should now be accessed via ``format_descriptor::format()`` (however, for + compatibility purposes, the old syntax ``format_descriptor::value`` will + still work for non-structured data types). `#308 + `_. + + 4. Further improvements to support structured dtypes throughout the system. + `#472 `_, + `#474 `_, + `#459 `_, + `#453 `_, + `#452 `_, and + `#505 `_. + + 5. Fast access operators. `#497 `_. + + 6. Constructors for arrays whose storage is owned by another object. + `#440 `_. + + 7. Added constructors for ``array`` and ``array_t`` explicitly accepting shape + and strides; if strides are not provided, they are deduced assuming + C-contiguity. Also added simplified constructors for 1-dimensional case. + + 8. Added buffer/NumPy support for ``char[N]`` and ``std::array`` types. + + 9. Added ``memoryview`` wrapper type which is constructible from ``buffer_info``. + +* Eigen: many additional conversions and support for non-contiguous + arrays/slices. + `#427 `_, + `#315 `_, + `#316 `_, + `#312 `_, and + `#267 `_ + +* Incompatible changes in ``class_<...>::class_()``: + + 1. Declarations of types that provide access via the buffer protocol must + now include the ``py::buffer_protocol()`` annotation as an argument to + the ``class_`` constructor. + + 2. Declarations of types that require a custom metaclass (i.e. all classes + which include static properties via commands such as + ``def_readwrite_static()``) must now include the ``py::metaclass()`` + annotation as an argument to the ``class_`` constructor. + + These two changes were necessary to make type definitions in pybind11 + future-proof, and to support PyPy via its cpyext mechanism. `#527 + `_. + + + 3. This version of pybind11 uses a redesigned mechanism for instantiating + trampoline classes that are used to override virtual methods from within + Python. This led to the following user-visible syntax change: instead of + + .. code-block:: cpp + + py::class_("MyClass") + .alias() + .... + + write + + .. code-block:: cpp + + py::class_("MyClass") + .... + + Importantly, both the original and the trampoline class are now + specified as an arguments (in arbitrary order) to the ``py::class_`` + template, and the ``alias<..>()`` call is gone. The new scheme has zero + overhead in cases when Python doesn't override any functions of the + underlying C++ class. `rev. 86d825 + `_. + +* Added ``eval`` and ``eval_file`` functions for evaluating expressions and + statements from a string or file. `rev. 0d3fc3 + `_. + +* pybind11 can now create types with a modifiable dictionary. + `#437 `_ and + `#444 `_. + +* Support for translation of arbitrary C++ exceptions to Python counterparts. + `#296 `_ and + `#273 `_. + +* Report full backtraces through mixed C++/Python code, better reporting for + import errors, fixed GIL management in exception processing. + `#537 `_, + `#494 `_, + `rev. e72d95 `_, and + `rev. 099d6e `_. + +* Support for bit-level operations, comparisons, and serialization of C++ + enumerations. `#503 `_, + `#508 `_, + `#380 `_, + `#309 `_. + `#311 `_. + +* The ``class_`` constructor now accepts its template arguments in any order. + `#385 `_. + +* Attribute and item accessors now have a more complete interface which makes + it possible to chain attributes as in + ``obj.attr("a")[key].attr("b").attr("method")(1, 2, 3)``. `#425 + `_. + +* Major redesign of the default and conversion constructors in ``pytypes.h``. + `#464 `_. + +* Added built-in support for ``std::shared_ptr`` holder type. It is no longer + necessary to to include a declaration of the form + ``PYBIND11_DECLARE_HOLDER_TYPE(T, std::shared_ptr)`` (though continuing to + do so won't cause an error). + `#454 `_. + +* New ``py::overload_cast`` casting operator to select among multiple possible + overloads of a function. An example: + + .. code-block:: cpp + + py::class_(m, "Pet") + .def("set", py::overload_cast(&Pet::set), "Set the pet's age") + .def("set", py::overload_cast(&Pet::set), "Set the pet's name"); + + This feature only works on C++14-capable compilers. + `#541 `_. + +* C++ types are automatically cast to Python types, e.g. when assigning + them as an attribute. For instance, the following is now legal: + + .. code-block:: cpp + + py::module m = /* ... */ + m.attr("constant") = 123; + + (Previously, a ``py::cast`` call was necessary to avoid a compilation error.) + `#551 `_. + +* Redesigned ``pytest``-based test suite. `#321 `_. + +* Instance tracking to detect reference leaks in test suite. `#324 `_ + +* pybind11 can now distinguish between multiple different instances that are + located at the same memory address, but which have different types. + `#329 `_. + +* Improved logic in ``move`` return value policy. + `#510 `_, + `#297 `_. + +* Generalized unpacking API to permit calling Python functions from C++ using + notation such as ``foo(a1, a2, *args, "ka"_a=1, "kb"_a=2, **kwargs)``. `#372 `_. + +* ``py::print()`` function whose behavior matches that of the native Python + ``print()`` function. `#372 `_. + +* Added ``py::dict`` keyword constructor:``auto d = dict("number"_a=42, + "name"_a="World");``. `#372 `_. + +* Added ``py::str::format()`` method and ``_s`` literal: ``py::str s = "1 + 2 + = {}"_s.format(3);``. `#372 `_. + +* Added ``py::repr()`` function which is equivalent to Python's builtin + ``repr()``. `#333 `_. + +* Improved construction and destruction logic for holder types. It is now + possible to reference instances with smart pointer holder types without + constructing the holder if desired. The ``PYBIND11_DECLARE_HOLDER_TYPE`` + macro now accepts an optional second parameter to indicate whether the holder + type uses intrusive reference counting. + `#533 `_ and + `#561 `_. + +* Mapping a stateless C++ function to Python and back is now "for free" (i.e. + no extra indirections or argument conversion overheads). `rev. 954b79 + `_. + +* Bindings for ``std::valarray``. + `#545 `_. + +* Improved support for C++17 capable compilers. + `#562 `_. + +* Bindings for ``std::optional``. + `#475 `_, + `#476 `_, + `#479 `_, + `#499 `_, and + `#501 `_. + +* ``stl_bind.h``: general improvements and support for ``std::map`` and + ``std::unordered_map``. + `#490 `_, + `#282 `_, + `#235 `_. + +* The ``std::tuple``, ``std::pair``, ``std::list``, and ``std::vector`` type + casters now accept any Python sequence type as input. `rev. 107285 + `_. + +* Improved CMake Python detection on multi-architecture Linux. + `#532 `_. + +* Infrastructure to selectively disable or enable parts of the automatically + generated docstrings. `#486 `_. + +* ``reference`` and ``reference_internal`` are now the default return value + properties for static and non-static properties, respectively. `#473 + `_. (the previous defaults + were ``automatic``). `#473 `_. + +* Support for ``std::unique_ptr`` with non-default deleters or no deleter at + all (``py::nodelete``). `#384 `_. + +* Deprecated ``handle::call()`` method. The new syntax to call Python + functions is simply ``handle()``. It can also be invoked explicitly via + ``handle::operator()``, where ``X`` is an optional return value policy. + +* Print more informative error messages when ``make_tuple()`` or ``cast()`` + fail. `#262 `_. + +* Creation of holder types for classes deriving from + ``std::enable_shared_from_this<>`` now also works for ``const`` values. + `#260 `_. + +* ``make_iterator()`` improvements for better compatibility with various + types (now uses prefix increment operator); it now also accepts iterators + with different begin/end types as long as they are equality comparable. + `#247 `_. + +* ``arg()`` now accepts a wider range of argument types for default values. + `#244 `_. + +* Support ``keep_alive`` where the nurse object may be ``None``. `#341 + `_. + +* Added constructors for ``str`` and ``bytes`` from zero-terminated char + pointers, and from char pointers and length. Added constructors for ``str`` + from ``bytes`` and for ``bytes`` from ``str``, which will perform UTF-8 + decoding/encoding as required. + +* Many other improvements of library internals without user-visible changes + + +1.8.1 (July 12, 2016) +---------------------- +* Fixed a rare but potentially very severe issue when the garbage collector ran + during pybind11 type creation. + +1.8.0 (June 14, 2016) +---------------------- +* Redesigned CMake build system which exports a convenient + ``pybind11_add_module`` function to parent projects. +* ``std::vector<>`` type bindings analogous to Boost.Python's ``indexing_suite`` +* Transparent conversion of sparse and dense Eigen matrices and vectors (``eigen.h``) +* Added an ``ExtraFlags`` template argument to the NumPy ``array_t<>`` wrapper + to disable an enforced cast that may lose precision, e.g. to create overloads + for different precisions and complex vs real-valued matrices. +* Prevent implicit conversion of floating point values to integral types in + function arguments +* Fixed incorrect default return value policy for functions returning a shared + pointer +* Don't allow registering a type via ``class_`` twice +* Don't allow casting a ``None`` value into a C++ lvalue reference +* Fixed a crash in ``enum_::operator==`` that was triggered by the ``help()`` command +* Improved detection of whether or not custom C++ types can be copy/move-constructed +* Extended ``str`` type to also work with ``bytes`` instances +* Added a ``"name"_a`` user defined string literal that is equivalent to ``py::arg("name")``. +* When specifying function arguments via ``py::arg``, the test that verifies + the number of arguments now runs at compile time. +* Added ``[[noreturn]]`` attribute to ``pybind11_fail()`` to quench some + compiler warnings +* List function arguments in exception text when the dispatch code cannot find + a matching overload +* Added ``PYBIND11_OVERLOAD_NAME`` and ``PYBIND11_OVERLOAD_PURE_NAME`` macros which + can be used to override virtual methods whose name differs in C++ and Python + (e.g. ``__call__`` and ``operator()``) +* Various minor ``iterator`` and ``make_iterator()`` improvements +* Transparently support ``__bool__`` on Python 2.x and Python 3.x +* Fixed issue with destructor of unpickled object not being called +* Minor CMake build system improvements on Windows +* New ``pybind11::args`` and ``pybind11::kwargs`` types to create functions which + take an arbitrary number of arguments and keyword arguments +* New syntax to call a Python function from C++ using ``*args`` and ``*kwargs`` +* The functions ``def_property_*`` now correctly process docstring arguments (these + formerly caused a segmentation fault) +* Many ``mkdoc.py`` improvements (enumerations, template arguments, ``DOC()`` + macro accepts more arguments) +* Cygwin support +* Documentation improvements (pickling support, ``keep_alive``, macro usage) + +1.7 (April 30, 2016) +---------------------- +* Added a new ``move`` return value policy that triggers C++11 move semantics. + The automatic return value policy falls back to this case whenever a rvalue + reference is encountered +* Significantly more general GIL state routines that are used instead of + Python's troublesome ``PyGILState_Ensure`` and ``PyGILState_Release`` API +* Redesign of opaque types that drastically simplifies their usage +* Extended ability to pass values of type ``[const] void *`` +* ``keep_alive`` fix: don't fail when there is no patient +* ``functional.h``: acquire the GIL before calling a Python function +* Added Python RAII type wrappers ``none`` and ``iterable`` +* Added ``*args`` and ``*kwargs`` pass-through parameters to + ``pybind11.get_include()`` function +* Iterator improvements and fixes +* Documentation on return value policies and opaque types improved + +1.6 (April 30, 2016) +---------------------- +* Skipped due to upload to PyPI gone wrong and inability to recover + (https://github.com/pypa/packaging-problems/issues/74) + +1.5 (April 21, 2016) +---------------------- +* For polymorphic types, use RTTI to try to return the closest type registered with pybind11 +* Pickling support for serializing and unserializing C++ instances to a byte stream in Python +* Added a convenience routine ``make_iterator()`` which turns a range indicated + by a pair of C++ iterators into a iterable Python object +* Added ``len()`` and a variadic ``make_tuple()`` function +* Addressed a rare issue that could confuse the current virtual function + dispatcher and another that could lead to crashes in multi-threaded + applications +* Added a ``get_include()`` function to the Python module that returns the path + of the directory containing the installed pybind11 header files +* Documentation improvements: import issues, symbol visibility, pickling, limitations +* Added casting support for ``std::reference_wrapper<>`` + +1.4 (April 7, 2016) +-------------------------- +* Transparent type conversion for ``std::wstring`` and ``wchar_t`` +* Allow passing ``nullptr``-valued strings +* Transparent passing of ``void *`` pointers using capsules +* Transparent support for returning values wrapped in ``std::unique_ptr<>`` +* Improved docstring generation for compatibility with Sphinx +* Nicer debug error message when default parameter construction fails +* Support for "opaque" types that bypass the transparent conversion layer for STL containers +* Redesigned type casting interface to avoid ambiguities that could occasionally cause compiler errors +* Redesigned property implementation; fixes crashes due to an unfortunate default return value policy +* Anaconda package generation support + +1.3 (March 8, 2016) +-------------------------- + +* Added support for the Intel C++ compiler (v15+) +* Added support for the STL unordered set/map data structures +* Added support for the STL linked list data structure +* NumPy-style broadcasting support in ``pybind11::vectorize`` +* pybind11 now displays more verbose error messages when ``arg::operator=()`` fails +* pybind11 internal data structures now live in a version-dependent namespace to avoid ABI issues +* Many, many bugfixes involving corner cases and advanced usage + +1.2 (February 7, 2016) +-------------------------- + +* Optional: efficient generation of function signatures at compile time using C++14 +* Switched to a simpler and more general way of dealing with function default + arguments. Unused keyword arguments in function calls are now detected and + cause errors as expected +* New ``keep_alive`` call policy analogous to Boost.Python's ``with_custodian_and_ward`` +* New ``pybind11::base<>`` attribute to indicate a subclass relationship +* Improved interface for RAII type wrappers in ``pytypes.h`` +* Use RAII type wrappers consistently within pybind11 itself. This + fixes various potential refcount leaks when exceptions occur +* Added new ``bytes`` RAII type wrapper (maps to ``string`` in Python 2.7) +* Made handle and related RAII classes const correct, using them more + consistently everywhere now +* Got rid of the ugly ``__pybind11__`` attributes on the Python side---they are + now stored in a C++ hash table that is not visible in Python +* Fixed refcount leaks involving NumPy arrays and bound functions +* Vastly improved handling of shared/smart pointers +* Removed an unnecessary copy operation in ``pybind11::vectorize`` +* Fixed naming clashes when both pybind11 and NumPy headers are included +* Added conversions for additional exception types +* Documentation improvements (using multiple extension modules, smart pointers, + other minor clarifications) +* unified infrastructure for parsing variadic arguments in ``class_`` and cpp_function +* Fixed license text (was: ZLIB, should have been: 3-clause BSD) +* Python 3.2 compatibility +* Fixed remaining issues when accessing types in another plugin module +* Added enum comparison and casting methods +* Improved SFINAE-based detection of whether types are copy-constructible +* Eliminated many warnings about unused variables and the use of ``offsetof()`` +* Support for ``std::array<>`` conversions + +1.1 (December 7, 2015) +-------------------------- + +* Documentation improvements (GIL, wrapping functions, casting, fixed many typos) +* Generalized conversion of integer types +* Improved support for casting function objects +* Improved support for ``std::shared_ptr<>`` conversions +* Initial support for ``std::set<>`` conversions +* Fixed type resolution issue for types defined in a separate plugin module +* CMake build system improvements +* Factored out generic functionality to non-templated code (smaller code size) +* Added a code size / compile time benchmark vs Boost.Python +* Added an appveyor CI script + +1.0 (October 15, 2015) +------------------------ +* Initial release diff --git a/third_party/pybind11/docs/classes.rst b/third_party/pybind11/docs/classes.rst new file mode 100644 index 0000000000..0c687b7c53 --- /dev/null +++ b/third_party/pybind11/docs/classes.rst @@ -0,0 +1,541 @@ +.. _classes: + +Object-oriented code +#################### + +Creating bindings for a custom type +=================================== + +Let's now look at a more complex example where we'll create bindings for a +custom C++ data structure named ``Pet``. Its definition is given below: + +.. code-block:: cpp + + struct Pet { + Pet(const std::string &name) : name(name) { } + void setName(const std::string &name_) { name = name_; } + const std::string &getName() const { return name; } + + std::string name; + }; + +The binding code for ``Pet`` looks as follows: + +.. code-block:: cpp + + #include + + namespace py = pybind11; + + PYBIND11_MODULE(example, m) { + py::class_(m, "Pet") + .def(py::init()) + .def("setName", &Pet::setName) + .def("getName", &Pet::getName); + } + +:class:`class_` creates bindings for a C++ *class* or *struct*-style data +structure. :func:`init` is a convenience function that takes the types of a +constructor's parameters as template arguments and wraps the corresponding +constructor (see the :ref:`custom_constructors` section for details). An +interactive Python session demonstrating this example is shown below: + +.. code-block:: pycon + + % python + >>> import example + >>> p = example.Pet("Molly") + >>> print(p) + + >>> p.getName() + 'Molly' + >>> p.setName("Charly") + >>> p.getName() + 'Charly' + +.. seealso:: + + Static member functions can be bound in the same way using + :func:`class_::def_static`. + +Keyword and default arguments +============================= +It is possible to specify keyword and default arguments using the syntax +discussed in the previous chapter. Refer to the sections :ref:`keyword_args` +and :ref:`default_args` for details. + +Binding lambda functions +======================== + +Note how ``print(p)`` produced a rather useless summary of our data structure in the example above: + +.. code-block:: pycon + + >>> print(p) + + +To address this, we could bind a utility function that returns a human-readable +summary to the special method slot named ``__repr__``. Unfortunately, there is no +suitable functionality in the ``Pet`` data structure, and it would be nice if +we did not have to change it. This can easily be accomplished by binding a +Lambda function instead: + +.. code-block:: cpp + + py::class_(m, "Pet") + .def(py::init()) + .def("setName", &Pet::setName) + .def("getName", &Pet::getName) + .def("__repr__", + [](const Pet &a) { + return ""; + } + ); + +Both stateless [#f1]_ and stateful lambda closures are supported by pybind11. +With the above change, the same Python code now produces the following output: + +.. code-block:: pycon + + >>> print(p) + + +.. [#f1] Stateless closures are those with an empty pair of brackets ``[]`` as the capture object. + +.. _properties: + +Instance and static fields +========================== + +We can also directly expose the ``name`` field using the +:func:`class_::def_readwrite` method. A similar :func:`class_::def_readonly` +method also exists for ``const`` fields. + +.. code-block:: cpp + + py::class_(m, "Pet") + .def(py::init()) + .def_readwrite("name", &Pet::name) + // ... remainder ... + +This makes it possible to write + +.. code-block:: pycon + + >>> p = example.Pet("Molly") + >>> p.name + 'Molly' + >>> p.name = "Charly" + >>> p.name + 'Charly' + +Now suppose that ``Pet::name`` was a private internal variable +that can only be accessed via setters and getters. + +.. code-block:: cpp + + class Pet { + public: + Pet(const std::string &name) : name(name) { } + void setName(const std::string &name_) { name = name_; } + const std::string &getName() const { return name; } + private: + std::string name; + }; + +In this case, the method :func:`class_::def_property` +(:func:`class_::def_property_readonly` for read-only data) can be used to +provide a field-like interface within Python that will transparently call +the setter and getter functions: + +.. code-block:: cpp + + py::class_(m, "Pet") + .def(py::init()) + .def_property("name", &Pet::getName, &Pet::setName) + // ... remainder ... + +Write only properties can be defined by passing ``nullptr`` as the +input for the read function. + +.. seealso:: + + Similar functions :func:`class_::def_readwrite_static`, + :func:`class_::def_readonly_static` :func:`class_::def_property_static`, + and :func:`class_::def_property_readonly_static` are provided for binding + static variables and properties. Please also see the section on + :ref:`static_properties` in the advanced part of the documentation. + +Dynamic attributes +================== + +Native Python classes can pick up new attributes dynamically: + +.. code-block:: pycon + + >>> class Pet: + ... name = "Molly" + ... + >>> p = Pet() + >>> p.name = "Charly" # overwrite existing + >>> p.age = 2 # dynamically add a new attribute + +By default, classes exported from C++ do not support this and the only writable +attributes are the ones explicitly defined using :func:`class_::def_readwrite` +or :func:`class_::def_property`. + +.. code-block:: cpp + + py::class_(m, "Pet") + .def(py::init<>()) + .def_readwrite("name", &Pet::name); + +Trying to set any other attribute results in an error: + +.. code-block:: pycon + + >>> p = example.Pet() + >>> p.name = "Charly" # OK, attribute defined in C++ + >>> p.age = 2 # fail + AttributeError: 'Pet' object has no attribute 'age' + +To enable dynamic attributes for C++ classes, the :class:`py::dynamic_attr` tag +must be added to the :class:`py::class_` constructor: + +.. code-block:: cpp + + py::class_(m, "Pet", py::dynamic_attr()) + .def(py::init<>()) + .def_readwrite("name", &Pet::name); + +Now everything works as expected: + +.. code-block:: pycon + + >>> p = example.Pet() + >>> p.name = "Charly" # OK, overwrite value in C++ + >>> p.age = 2 # OK, dynamically add a new attribute + >>> p.__dict__ # just like a native Python class + {'age': 2} + +Note that there is a small runtime cost for a class with dynamic attributes. +Not only because of the addition of a ``__dict__``, but also because of more +expensive garbage collection tracking which must be activated to resolve +possible circular references. Native Python classes incur this same cost by +default, so this is not anything to worry about. By default, pybind11 classes +are more efficient than native Python classes. Enabling dynamic attributes +just brings them on par. + +.. _inheritance: + +Inheritance and automatic downcasting +===================================== + +Suppose now that the example consists of two data structures with an +inheritance relationship: + +.. code-block:: cpp + + struct Pet { + Pet(const std::string &name) : name(name) { } + std::string name; + }; + + struct Dog : Pet { + Dog(const std::string &name) : Pet(name) { } + std::string bark() const { return "woof!"; } + }; + +There are two different ways of indicating a hierarchical relationship to +pybind11: the first specifies the C++ base class as an extra template +parameter of the :class:`class_`: + +.. code-block:: cpp + + py::class_(m, "Pet") + .def(py::init()) + .def_readwrite("name", &Pet::name); + + // Method 1: template parameter: + py::class_(m, "Dog") + .def(py::init()) + .def("bark", &Dog::bark); + +Alternatively, we can also assign a name to the previously bound ``Pet`` +:class:`class_` object and reference it when binding the ``Dog`` class: + +.. code-block:: cpp + + py::class_ pet(m, "Pet"); + pet.def(py::init()) + .def_readwrite("name", &Pet::name); + + // Method 2: pass parent class_ object: + py::class_(m, "Dog", pet /* <- specify Python parent type */) + .def(py::init()) + .def("bark", &Dog::bark); + +Functionality-wise, both approaches are equivalent. Afterwards, instances will +expose fields and methods of both types: + +.. code-block:: pycon + + >>> p = example.Dog("Molly") + >>> p.name + 'Molly' + >>> p.bark() + 'woof!' + +The C++ classes defined above are regular non-polymorphic types with an +inheritance relationship. This is reflected in Python: + +.. code-block:: cpp + + // Return a base pointer to a derived instance + m.def("pet_store", []() { return std::unique_ptr(new Dog("Molly")); }); + +.. code-block:: pycon + + >>> p = example.pet_store() + >>> type(p) # `Dog` instance behind `Pet` pointer + Pet # no pointer downcasting for regular non-polymorphic types + >>> p.bark() + AttributeError: 'Pet' object has no attribute 'bark' + +The function returned a ``Dog`` instance, but because it's a non-polymorphic +type behind a base pointer, Python only sees a ``Pet``. In C++, a type is only +considered polymorphic if it has at least one virtual function and pybind11 +will automatically recognize this: + +.. code-block:: cpp + + struct PolymorphicPet { + virtual ~PolymorphicPet() = default; + }; + + struct PolymorphicDog : PolymorphicPet { + std::string bark() const { return "woof!"; } + }; + + // Same binding code + py::class_(m, "PolymorphicPet"); + py::class_(m, "PolymorphicDog") + .def(py::init<>()) + .def("bark", &PolymorphicDog::bark); + + // Again, return a base pointer to a derived instance + m.def("pet_store2", []() { return std::unique_ptr(new PolymorphicDog); }); + +.. code-block:: pycon + + >>> p = example.pet_store2() + >>> type(p) + PolymorphicDog # automatically downcast + >>> p.bark() + 'woof!' + +Given a pointer to a polymorphic base, pybind11 performs automatic downcasting +to the actual derived type. Note that this goes beyond the usual situation in +C++: we don't just get access to the virtual functions of the base, we get the +concrete derived type including functions and attributes that the base type may +not even be aware of. + +.. seealso:: + + For more information about polymorphic behavior see :ref:`overriding_virtuals`. + + +Overloaded methods +================== + +Sometimes there are several overloaded C++ methods with the same name taking +different kinds of input arguments: + +.. code-block:: cpp + + struct Pet { + Pet(const std::string &name, int age) : name(name), age(age) { } + + void set(int age_) { age = age_; } + void set(const std::string &name_) { name = name_; } + + std::string name; + int age; + }; + +Attempting to bind ``Pet::set`` will cause an error since the compiler does not +know which method the user intended to select. We can disambiguate by casting +them to function pointers. Binding multiple functions to the same Python name +automatically creates a chain of function overloads that will be tried in +sequence. + +.. code-block:: cpp + + py::class_(m, "Pet") + .def(py::init()) + .def("set", static_cast(&Pet::set), "Set the pet's age") + .def("set", static_cast(&Pet::set), "Set the pet's name"); + +The overload signatures are also visible in the method's docstring: + +.. code-block:: pycon + + >>> help(example.Pet) + + class Pet(__builtin__.object) + | Methods defined here: + | + | __init__(...) + | Signature : (Pet, str, int) -> NoneType + | + | set(...) + | 1. Signature : (Pet, int) -> NoneType + | + | Set the pet's age + | + | 2. Signature : (Pet, str) -> NoneType + | + | Set the pet's name + +If you have a C++14 compatible compiler [#cpp14]_, you can use an alternative +syntax to cast the overloaded function: + +.. code-block:: cpp + + py::class_(m, "Pet") + .def("set", py::overload_cast(&Pet::set), "Set the pet's age") + .def("set", py::overload_cast(&Pet::set), "Set the pet's name"); + +Here, ``py::overload_cast`` only requires the parameter types to be specified. +The return type and class are deduced. This avoids the additional noise of +``void (Pet::*)()`` as seen in the raw cast. If a function is overloaded based +on constness, the ``py::const_`` tag should be used: + +.. code-block:: cpp + + struct Widget { + int foo(int x, float y); + int foo(int x, float y) const; + }; + + py::class_(m, "Widget") + .def("foo_mutable", py::overload_cast(&Widget::foo)) + .def("foo_const", py::overload_cast(&Widget::foo, py::const_)); + +If you prefer the ``py::overload_cast`` syntax but have a C++11 compatible compiler only, +you can use ``py::detail::overload_cast_impl`` with an additional set of parentheses: + +.. code-block:: cpp + + template + using overload_cast_ = pybind11::detail::overload_cast_impl; + + py::class_(m, "Pet") + .def("set", overload_cast_()(&Pet::set), "Set the pet's age") + .def("set", overload_cast_()(&Pet::set), "Set the pet's name"); + +.. [#cpp14] A compiler which supports the ``-std=c++14`` flag. + +.. note:: + + To define multiple overloaded constructors, simply declare one after the + other using the ``.def(py::init<...>())`` syntax. The existing machinery + for specifying keyword and default arguments also works. + +Enumerations and internal types +=============================== + +Let's now suppose that the example class contains internal types like enumerations, e.g.: + +.. code-block:: cpp + + struct Pet { + enum Kind { + Dog = 0, + Cat + }; + + struct Attributes { + float age = 0; + }; + + Pet(const std::string &name, Kind type) : name(name), type(type) { } + + std::string name; + Kind type; + Attributes attr; + }; + +The binding code for this example looks as follows: + +.. code-block:: cpp + + py::class_ pet(m, "Pet"); + + pet.def(py::init()) + .def_readwrite("name", &Pet::name) + .def_readwrite("type", &Pet::type) + .def_readwrite("attr", &Pet::attr); + + py::enum_(pet, "Kind") + .value("Dog", Pet::Kind::Dog) + .value("Cat", Pet::Kind::Cat) + .export_values(); + + py::class_ attributes(pet, "Attributes") + .def(py::init<>()) + .def_readwrite("age", &Pet::Attributes::age); + + +To ensure that the nested types ``Kind`` and ``Attributes`` are created within the scope of ``Pet``, the +``pet`` :class:`class_` instance must be supplied to the :class:`enum_` and :class:`class_` +constructor. The :func:`enum_::export_values` function exports the enum entries +into the parent scope, which should be skipped for newer C++11-style strongly +typed enums. + +.. code-block:: pycon + + >>> p = Pet("Lucy", Pet.Cat) + >>> p.type + Kind.Cat + >>> int(p.type) + 1L + +The entries defined by the enumeration type are exposed in the ``__members__`` property: + +.. code-block:: pycon + + >>> Pet.Kind.__members__ + {'Dog': Kind.Dog, 'Cat': Kind.Cat} + +The ``name`` property returns the name of the enum value as a unicode string. + +.. note:: + + It is also possible to use ``str(enum)``, however these accomplish different + goals. The following shows how these two approaches differ. + + .. code-block:: pycon + + >>> p = Pet("Lucy", Pet.Cat) + >>> pet_type = p.type + >>> pet_type + Pet.Cat + >>> str(pet_type) + 'Pet.Cat' + >>> pet_type.name + 'Cat' + +.. note:: + + When the special tag ``py::arithmetic()`` is specified to the ``enum_`` + constructor, pybind11 creates an enumeration that also supports rudimentary + arithmetic and bit-level operations like comparisons, and, or, xor, negation, + etc. + + .. code-block:: cpp + + py::enum_(pet, "Kind", py::arithmetic()) + ... + + By default, these are omitted to conserve space. diff --git a/third_party/pybind11/docs/cmake/index.rst b/third_party/pybind11/docs/cmake/index.rst new file mode 100644 index 0000000000..eaf66d70f3 --- /dev/null +++ b/third_party/pybind11/docs/cmake/index.rst @@ -0,0 +1,8 @@ +CMake helpers +------------- + +Pybind11 can be used with ``add_subdirectory(extern/pybind11)``, or from an +install with ``find_package(pybind11 CONFIG)``. The interface provided in +either case is functionally identical. + +.. cmake-module:: ../../tools/pybind11Config.cmake.in diff --git a/third_party/pybind11/docs/compiling.rst b/third_party/pybind11/docs/compiling.rst new file mode 100644 index 0000000000..2b543be0be --- /dev/null +++ b/third_party/pybind11/docs/compiling.rst @@ -0,0 +1,638 @@ +.. _compiling: + +Build systems +############# + +.. _build-setuptools: + +Building with setuptools +======================== + +For projects on PyPI, building with setuptools is the way to go. Sylvain Corlay +has kindly provided an example project which shows how to set up everything, +including automatic generation of documentation using Sphinx. Please refer to +the [python_example]_ repository. + +.. [python_example] https://github.com/pybind/python_example + +A helper file is provided with pybind11 that can simplify usage with setuptools. + +To use pybind11 inside your ``setup.py``, you have to have some system to +ensure that ``pybind11`` is installed when you build your package. There are +four possible ways to do this, and pybind11 supports all four: You can ask all +users to install pybind11 beforehand (bad), you can use +:ref:`setup_helpers-pep518` (good, but very new and requires Pip 10), +:ref:`setup_helpers-setup_requires` (discouraged by Python packagers now that +PEP 518 is available, but it still works everywhere), or you can +:ref:`setup_helpers-copy-manually` (always works but you have to manually sync +your copy to get updates). + +An example of a ``setup.py`` using pybind11's helpers: + +.. code-block:: python + + from glob import glob + from setuptools import setup + from pybind11.setup_helpers import Pybind11Extension + + ext_modules = [ + Pybind11Extension( + "python_example", + sorted(glob("src/*.cpp")), # Sort source files for reproducibility + ), + ] + + setup(..., ext_modules=ext_modules) + +If you want to do an automatic search for the highest supported C++ standard, +that is supported via a ``build_ext`` command override; it will only affect +``Pybind11Extensions``: + +.. code-block:: python + + from glob import glob + from setuptools import setup + from pybind11.setup_helpers import Pybind11Extension, build_ext + + ext_modules = [ + Pybind11Extension( + "python_example", + sorted(glob("src/*.cpp")), + ), + ] + + setup(..., cmdclass={"build_ext": build_ext}, ext_modules=ext_modules) + +If you have single-file extension modules that are directly stored in the +Python source tree (``foo.cpp`` in the same directory as where a ``foo.py`` +would be located), you can also generate ``Pybind11Extensions`` using +``setup_helpers.intree_extensions``: ``intree_extensions(["path/to/foo.cpp", +...])`` returns a list of ``Pybind11Extensions`` which can be passed to +``ext_modules``, possibly after further customizing their attributes +(``libraries``, ``include_dirs``, etc.). By doing so, a ``foo.*.so`` extension +module will be generated and made available upon installation. + +``intree_extension`` will automatically detect if you are using a ``src``-style +layout (as long as no namespace packages are involved), but you can also +explicitly pass ``package_dir`` to it (as in ``setuptools.setup``). + +Since pybind11 does not require NumPy when building, a light-weight replacement +for NumPy's parallel compilation distutils tool is included. Use it like this: + +.. code-block:: python + + from pybind11.setup_helpers import ParallelCompile + + # Optional multithreaded build + ParallelCompile("NPY_NUM_BUILD_JOBS").install() + + setup(...) + +The argument is the name of an environment variable to control the number of +threads, such as ``NPY_NUM_BUILD_JOBS`` (as used by NumPy), though you can set +something different if you want; ``CMAKE_BUILD_PARALLEL_LEVEL`` is another choice +a user might expect. You can also pass ``default=N`` to set the default number +of threads (0 will take the number of threads available) and ``max=N``, the +maximum number of threads; if you have a large extension you may want set this +to a memory dependent number. + +If you are developing rapidly and have a lot of C++ files, you may want to +avoid rebuilding files that have not changed. For simple cases were you are +using ``pip install -e .`` and do not have local headers, you can skip the +rebuild if an object file is newer than its source (headers are not checked!) +with the following: + +.. code-block:: python + + from pybind11.setup_helpers import ParallelCompile, naive_recompile + + ParallelCompile("NPY_NUM_BUILD_JOBS", needs_recompile=naive_recompile).install() + + +If you have a more complex build, you can implement a smarter function and pass +it to ``needs_recompile``, or you can use [Ccache]_ instead. ``CXX="cache g++" +pip install -e .`` would be the way to use it with GCC, for example. Unlike the +simple solution, this even works even when not compiling in editable mode, but +it does require Ccache to be installed. + +Keep in mind that Pip will not even attempt to rebuild if it thinks it has +already built a copy of your code, which it deduces from the version number. +One way to avoid this is to use [setuptools_scm]_, which will generate a +version number that includes the number of commits since your last tag and a +hash for a dirty directory. Another way to force a rebuild is purge your cache +or use Pip's ``--no-cache-dir`` option. + +.. [Ccache] https://ccache.dev + +.. [setuptools_scm] https://github.com/pypa/setuptools_scm + +.. _setup_helpers-pep518: + +PEP 518 requirements (Pip 10+ required) +--------------------------------------- + +If you use `PEP 518's `_ +``pyproject.toml`` file, you can ensure that ``pybind11`` is available during +the compilation of your project. When this file exists, Pip will make a new +virtual environment, download just the packages listed here in ``requires=``, +and build a wheel (binary Python package). It will then throw away the +environment, and install your wheel. + +Your ``pyproject.toml`` file will likely look something like this: + +.. code-block:: toml + + [build-system] + requires = ["setuptools>=42", "wheel", "pybind11~=2.6.1"] + build-backend = "setuptools.build_meta" + +.. note:: + + The main drawback to this method is that a `PEP 517`_ compliant build tool, + such as Pip 10+, is required for this approach to work; older versions of + Pip completely ignore this file. If you distribute binaries (called wheels + in Python) using something like `cibuildwheel`_, remember that ``setup.py`` + and ``pyproject.toml`` are not even contained in the wheel, so this high + Pip requirement is only for source builds, and will not affect users of + your binary wheels. If you are building SDists and wheels, then + `pypa-build`_ is the recommended official tool. + +.. _PEP 517: https://www.python.org/dev/peps/pep-0517/ +.. _cibuildwheel: https://cibuildwheel.readthedocs.io +.. _pypa-build: https://pypa-build.readthedocs.io/en/latest/ + +.. _setup_helpers-setup_requires: + +Classic ``setup_requires`` +-------------------------- + +If you want to support old versions of Pip with the classic +``setup_requires=["pybind11"]`` keyword argument to setup, which triggers a +two-phase ``setup.py`` run, then you will need to use something like this to +ensure the first pass works (which has not yet installed the ``setup_requires`` +packages, since it can't install something it does not know about): + +.. code-block:: python + + try: + from pybind11.setup_helpers import Pybind11Extension + except ImportError: + from setuptools import Extension as Pybind11Extension + + +It doesn't matter that the Extension class is not the enhanced subclass for the +first pass run; and the second pass will have the ``setup_requires`` +requirements. + +This is obviously more of a hack than the PEP 518 method, but it supports +ancient versions of Pip. + +.. _setup_helpers-copy-manually: + +Copy manually +------------- + +You can also copy ``setup_helpers.py`` directly to your project; it was +designed to be usable standalone, like the old example ``setup.py``. You can +set ``include_pybind11=False`` to skip including the pybind11 package headers, +so you can use it with git submodules and a specific git version. If you use +this, you will need to import from a local file in ``setup.py`` and ensure the +helper file is part of your MANIFEST. + + +Closely related, if you include pybind11 as a subproject, you can run the +``setup_helpers.py`` inplace. If loaded correctly, this should even pick up +the correct include for pybind11, though you can turn it off as shown above if +you want to input it manually. + +Suggested usage if you have pybind11 as a submodule in ``extern/pybind11``: + +.. code-block:: python + + DIR = os.path.abspath(os.path.dirname(__file__)) + + sys.path.append(os.path.join(DIR, "extern", "pybind11")) + from pybind11.setup_helpers import Pybind11Extension # noqa: E402 + + del sys.path[-1] + + +.. versionchanged:: 2.6 + + Added ``setup_helpers`` file. + +Building with cppimport +======================== + +[cppimport]_ is a small Python import hook that determines whether there is a C++ +source file whose name matches the requested module. If there is, the file is +compiled as a Python extension using pybind11 and placed in the same folder as +the C++ source file. Python is then able to find the module and load it. + +.. [cppimport] https://github.com/tbenthompson/cppimport + +.. _cmake: + +Building with CMake +=================== + +For C++ codebases that have an existing CMake-based build system, a Python +extension module can be created with just a few lines of code: + +.. code-block:: cmake + + cmake_minimum_required(VERSION 3.4...3.18) + project(example LANGUAGES CXX) + + add_subdirectory(pybind11) + pybind11_add_module(example example.cpp) + +This assumes that the pybind11 repository is located in a subdirectory named +:file:`pybind11` and that the code is located in a file named :file:`example.cpp`. +The CMake command ``add_subdirectory`` will import the pybind11 project which +provides the ``pybind11_add_module`` function. It will take care of all the +details needed to build a Python extension module on any platform. + +A working sample project, including a way to invoke CMake from :file:`setup.py` for +PyPI integration, can be found in the [cmake_example]_ repository. + +.. [cmake_example] https://github.com/pybind/cmake_example + +.. versionchanged:: 2.6 + CMake 3.4+ is required. + +Further information can be found at :doc:`cmake/index`. + +pybind11_add_module +------------------- + +To ease the creation of Python extension modules, pybind11 provides a CMake +function with the following signature: + +.. code-block:: cmake + + pybind11_add_module( [MODULE | SHARED] [EXCLUDE_FROM_ALL] + [NO_EXTRAS] [THIN_LTO] [OPT_SIZE] source1 [source2 ...]) + +This function behaves very much like CMake's builtin ``add_library`` (in fact, +it's a wrapper function around that command). It will add a library target +called ```` to be built from the listed source files. In addition, it +will take care of all the Python-specific compiler and linker flags as well +as the OS- and Python-version-specific file extension. The produced target +```` can be further manipulated with regular CMake commands. + +``MODULE`` or ``SHARED`` may be given to specify the type of library. If no +type is given, ``MODULE`` is used by default which ensures the creation of a +Python-exclusive module. Specifying ``SHARED`` will create a more traditional +dynamic library which can also be linked from elsewhere. ``EXCLUDE_FROM_ALL`` +removes this target from the default build (see CMake docs for details). + +Since pybind11 is a template library, ``pybind11_add_module`` adds compiler +flags to ensure high quality code generation without bloat arising from long +symbol names and duplication of code in different translation units. It +sets default visibility to *hidden*, which is required for some pybind11 +features and functionality when attempting to load multiple pybind11 modules +compiled under different pybind11 versions. It also adds additional flags +enabling LTO (Link Time Optimization) and strip unneeded symbols. See the +:ref:`FAQ entry ` for a more detailed explanation. These +latter optimizations are never applied in ``Debug`` mode. If ``NO_EXTRAS`` is +given, they will always be disabled, even in ``Release`` mode. However, this +will result in code bloat and is generally not recommended. + +As stated above, LTO is enabled by default. Some newer compilers also support +different flavors of LTO such as `ThinLTO`_. Setting ``THIN_LTO`` will cause +the function to prefer this flavor if available. The function falls back to +regular LTO if ``-flto=thin`` is not available. If +``CMAKE_INTERPROCEDURAL_OPTIMIZATION`` is set (either ``ON`` or ``OFF``), then +that will be respected instead of the built-in flag search. + +.. note:: + + If you want to set the property form on targets or the + ``CMAKE_INTERPROCEDURAL_OPTIMIZATION_`` versions of this, you should + still use ``set(CMAKE_INTERPROCEDURAL_OPTIMIZATION OFF)`` (otherwise a + no-op) to disable pybind11's ipo flags. + +The ``OPT_SIZE`` flag enables size-based optimization equivalent to the +standard ``/Os`` or ``-Os`` compiler flags and the ``MinSizeRel`` build type, +which avoid optimizations that that can substantially increase the size of the +resulting binary. This flag is particularly useful in projects that are split +into performance-critical parts and associated bindings. In this case, we can +compile the project in release mode (and hence, optimize performance globally), +and specify ``OPT_SIZE`` for the binding target, where size might be the main +concern as performance is often less critical here. A ~25% size reduction has +been observed in practice. This flag only changes the optimization behavior at +a per-target level and takes precedence over the global CMake build type +(``Release``, ``RelWithDebInfo``) except for ``Debug`` builds, where +optimizations remain disabled. + +.. _ThinLTO: http://clang.llvm.org/docs/ThinLTO.html + +Configuration variables +----------------------- + +By default, pybind11 will compile modules with the compiler default or the +minimum standard required by pybind11, whichever is higher. You can set the +standard explicitly with +`CMAKE_CXX_STANDARD `_: + +.. code-block:: cmake + + set(CMAKE_CXX_STANDARD 14 CACHE STRING "C++ version selection") # or 11, 14, 17, 20 + set(CMAKE_CXX_STANDARD_REQUIRED ON) # optional, ensure standard is supported + set(CMAKE_CXX_EXTENSIONS OFF) # optional, keep compiler extensions off + +The variables can also be set when calling CMake from the command line using +the ``-D=`` flag. You can also manually set ``CXX_STANDARD`` +on a target or use ``target_compile_features`` on your targets - anything that +CMake supports. + +Classic Python support: The target Python version can be selected by setting +``PYBIND11_PYTHON_VERSION`` or an exact Python installation can be specified +with ``PYTHON_EXECUTABLE``. For example: + +.. code-block:: bash + + cmake -DPYBIND11_PYTHON_VERSION=3.6 .. + + # Another method: + cmake -DPYTHON_EXECUTABLE=/path/to/python .. + + # This often is a good way to get the current Python, works in environments: + cmake -DPYTHON_EXECUTABLE=$(python3 -c "import sys; print(sys.executable)") .. + + +find_package vs. add_subdirectory +--------------------------------- + +For CMake-based projects that don't include the pybind11 repository internally, +an external installation can be detected through ``find_package(pybind11)``. +See the `Config file`_ docstring for details of relevant CMake variables. + +.. code-block:: cmake + + cmake_minimum_required(VERSION 3.4...3.18) + project(example LANGUAGES CXX) + + find_package(pybind11 REQUIRED) + pybind11_add_module(example example.cpp) + +Note that ``find_package(pybind11)`` will only work correctly if pybind11 +has been correctly installed on the system, e. g. after downloading or cloning +the pybind11 repository : + +.. code-block:: bash + + # Classic CMake + cd pybind11 + mkdir build + cd build + cmake .. + make install + + # CMake 3.15+ + cd pybind11 + cmake -S . -B build + cmake --build build -j 2 # Build on 2 cores + cmake --install build + +Once detected, the aforementioned ``pybind11_add_module`` can be employed as +before. The function usage and configuration variables are identical no matter +if pybind11 is added as a subdirectory or found as an installed package. You +can refer to the same [cmake_example]_ repository for a full sample project +-- just swap out ``add_subdirectory`` for ``find_package``. + +.. _Config file: https://github.com/pybind/pybind11/blob/master/tools/pybind11Config.cmake.in + + +.. _find-python-mode: + +FindPython mode +--------------- + +CMake 3.12+ (3.15+ recommended, 3.18.2+ ideal) added a new module called +FindPython that had a highly improved search algorithm and modern targets +and tools. If you use FindPython, pybind11 will detect this and use the +existing targets instead: + +.. code-block:: cmake + + cmake_minimum_required(VERSION 3.15...3.22) + project(example LANGUAGES CXX) + + find_package(Python 3.6 COMPONENTS Interpreter Development REQUIRED) + find_package(pybind11 CONFIG REQUIRED) + # or add_subdirectory(pybind11) + + pybind11_add_module(example example.cpp) + +You can also use the targets (as listed below) with FindPython. If you define +``PYBIND11_FINDPYTHON``, pybind11 will perform the FindPython step for you +(mostly useful when building pybind11's own tests, or as a way to change search +algorithms from the CMake invocation, with ``-DPYBIND11_FINDPYTHON=ON``. + +.. warning:: + + If you use FindPython to multi-target Python versions, use the individual + targets listed below, and avoid targets that directly include Python parts. + +There are `many ways to hint or force a discovery of a specific Python +installation `_), +setting ``Python_ROOT_DIR`` may be the most common one (though with +virtualenv/venv support, and Conda support, this tends to find the correct +Python version more often than the old system did). + +.. warning:: + + When the Python libraries (i.e. ``libpythonXX.a`` and ``libpythonXX.so`` + on Unix) are not available, as is the case on a manylinux image, the + ``Development`` component will not be resolved by ``FindPython``. When not + using the embedding functionality, CMake 3.18+ allows you to specify + ``Development.Module`` instead of ``Development`` to resolve this issue. + +.. versionadded:: 2.6 + +Advanced: interface library targets +----------------------------------- + +Pybind11 supports modern CMake usage patterns with a set of interface targets, +available in all modes. The targets provided are: + + ``pybind11::headers`` + Just the pybind11 headers and minimum compile requirements + + ``pybind11::pybind11`` + Python headers + ``pybind11::headers`` + + ``pybind11::python_link_helper`` + Just the "linking" part of pybind11:module + + ``pybind11::module`` + Everything for extension modules - ``pybind11::pybind11`` + ``Python::Module`` (FindPython CMake 3.15+) or ``pybind11::python_link_helper`` + + ``pybind11::embed`` + Everything for embedding the Python interpreter - ``pybind11::pybind11`` + ``Python::Python`` (FindPython) or Python libs + + ``pybind11::lto`` / ``pybind11::thin_lto`` + An alternative to `INTERPROCEDURAL_OPTIMIZATION` for adding link-time optimization. + + ``pybind11::windows_extras`` + ``/bigobj`` and ``/mp`` for MSVC. + + ``pybind11::opt_size`` + ``/Os`` for MSVC, ``-Os`` for other compilers. Does nothing for debug builds. + +Two helper functions are also provided: + + ``pybind11_strip(target)`` + Strips a target (uses ``CMAKE_STRIP`` after the target is built) + + ``pybind11_extension(target)`` + Sets the correct extension (with SOABI) for a target. + +You can use these targets to build complex applications. For example, the +``add_python_module`` function is identical to: + +.. code-block:: cmake + + cmake_minimum_required(VERSION 3.4) + project(example LANGUAGES CXX) + + find_package(pybind11 REQUIRED) # or add_subdirectory(pybind11) + + add_library(example MODULE main.cpp) + + target_link_libraries(example PRIVATE pybind11::module pybind11::lto pybind11::windows_extras) + + pybind11_extension(example) + if(NOT MSVC AND NOT ${CMAKE_BUILD_TYPE} MATCHES Debug|RelWithDebInfo) + # Strip unnecessary sections of the binary on Linux/macOS + pybind11_strip(example) + endif() + + set_target_properties(example PROPERTIES CXX_VISIBILITY_PRESET "hidden" + CUDA_VISIBILITY_PRESET "hidden") + +Instead of setting properties, you can set ``CMAKE_*`` variables to initialize these correctly. + +.. warning:: + + Since pybind11 is a metatemplate library, it is crucial that certain + compiler flags are provided to ensure high quality code generation. In + contrast to the ``pybind11_add_module()`` command, the CMake interface + provides a *composable* set of targets to ensure that you retain flexibility. + It can be especially important to provide or set these properties; the + :ref:`FAQ ` contains an explanation on why these are needed. + +.. versionadded:: 2.6 + +.. _nopython-mode: + +Advanced: NOPYTHON mode +----------------------- + +If you want complete control, you can set ``PYBIND11_NOPYTHON`` to completely +disable Python integration (this also happens if you run ``FindPython2`` and +``FindPython3`` without running ``FindPython``). This gives you complete +freedom to integrate into an existing system (like `Scikit-Build's +`_ ``PythonExtensions``). +``pybind11_add_module`` and ``pybind11_extension`` will be unavailable, and the +targets will be missing any Python specific behavior. + +.. versionadded:: 2.6 + +Embedding the Python interpreter +-------------------------------- + +In addition to extension modules, pybind11 also supports embedding Python into +a C++ executable or library. In CMake, simply link with the ``pybind11::embed`` +target. It provides everything needed to get the interpreter running. The Python +headers and libraries are attached to the target. Unlike ``pybind11::module``, +there is no need to manually set any additional properties here. For more +information about usage in C++, see :doc:`/advanced/embedding`. + +.. code-block:: cmake + + cmake_minimum_required(VERSION 3.4...3.18) + project(example LANGUAGES CXX) + + find_package(pybind11 REQUIRED) # or add_subdirectory(pybind11) + + add_executable(example main.cpp) + target_link_libraries(example PRIVATE pybind11::embed) + +.. _building_manually: + +Building manually +================= + +pybind11 is a header-only library, hence it is not necessary to link against +any special libraries and there are no intermediate (magic) translation steps. + +On Linux, you can compile an example such as the one given in +:ref:`simple_example` using the following command: + +.. code-block:: bash + + $ c++ -O3 -Wall -shared -std=c++11 -fPIC $(python3 -m pybind11 --includes) example.cpp -o example$(python3-config --extension-suffix) + +The ``python3 -m pybind11 --includes`` command fetches the include paths for +both pybind11 and Python headers. This assumes that pybind11 has been installed +using ``pip`` or ``conda``. If it hasn't, you can also manually specify +``-I /include`` together with the Python includes path +``python3-config --includes``. + +On macOS: the build command is almost the same but it also requires passing +the ``-undefined dynamic_lookup`` flag so as to ignore missing symbols when +building the module: + +.. code-block:: bash + + $ c++ -O3 -Wall -shared -std=c++11 -undefined dynamic_lookup $(python3 -m pybind11 --includes) example.cpp -o example$(python3-config --extension-suffix) + +In general, it is advisable to include several additional build parameters +that can considerably reduce the size of the created binary. Refer to section +:ref:`cmake` for a detailed example of a suitable cross-platform CMake-based +build system that works on all platforms including Windows. + +.. note:: + + On Linux and macOS, it's better to (intentionally) not link against + ``libpython``. The symbols will be resolved when the extension library + is loaded into a Python binary. This is preferable because you might + have several different installations of a given Python version (e.g. the + system-provided Python, and one that ships with a piece of commercial + software). In this way, the plugin will work with both versions, instead + of possibly importing a second Python library into a process that already + contains one (which will lead to a segfault). + + +Building with Bazel +=================== + +You can build with the Bazel build system using the `pybind11_bazel +`_ repository. + +Generating binding code automatically +===================================== + +The ``Binder`` project is a tool for automatic generation of pybind11 binding +code by introspecting existing C++ codebases using LLVM/Clang. See the +[binder]_ documentation for details. + +.. [binder] http://cppbinder.readthedocs.io/en/latest/about.html + +[AutoWIG]_ is a Python library that wraps automatically compiled libraries into +high-level languages. It parses C++ code using LLVM/Clang technologies and +generates the wrappers using the Mako templating engine. The approach is automatic, +extensible, and applies to very complex C++ libraries, composed of thousands of +classes or incorporating modern meta-programming constructs. + +.. [AutoWIG] https://github.com/StatisKit/AutoWIG + +[robotpy-build]_ is a is a pure python, cross platform build tool that aims to +simplify creation of python wheels for pybind11 projects, and provide +cross-project dependency management. Additionally, it is able to autogenerate +customizable pybind11-based wrappers by parsing C++ header files. + +.. [robotpy-build] https://robotpy-build.readthedocs.io diff --git a/third_party/pybind11/docs/conf.py b/third_party/pybind11/docs/conf.py new file mode 100644 index 0000000000..8928fd9160 --- /dev/null +++ b/third_party/pybind11/docs/conf.py @@ -0,0 +1,376 @@ +#!/usr/bin/env python3 +# +# pybind11 documentation build configuration file, created by +# sphinx-quickstart on Sun Oct 11 19:23:48 2015. +# +# This file is execfile()d with the current directory set to its +# containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +import os +import re +import subprocess +import sys +from pathlib import Path + +DIR = Path(__file__).parent.resolve() + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# sys.path.insert(0, os.path.abspath('.')) + +# -- General configuration ------------------------------------------------ + +# If your documentation needs a minimal Sphinx version, state it here. +# needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + "breathe", + "sphinxcontrib.rsvgconverter", + "sphinxcontrib.moderncmakedomain", +] + +breathe_projects = {"pybind11": ".build/doxygenxml/"} +breathe_default_project = "pybind11" +breathe_domain_by_extension = {"h": "cpp"} + +# Add any paths that contain templates here, relative to this directory. +templates_path = [".templates"] + +# The suffix(es) of source filenames. +# You can specify multiple suffix as a list of string: +# source_suffix = ['.rst', '.md'] +source_suffix = ".rst" + +# The encoding of source files. +# source_encoding = 'utf-8-sig' + +# The master toctree document. +master_doc = "index" + +# General information about the project. +project = "pybind11" +copyright = "2017, Wenzel Jakob" +author = "Wenzel Jakob" + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. + +# Read the listed version +with open("../pybind11/_version.py") as f: + code = compile(f.read(), "../pybind11/_version.py", "exec") +loc = {} +exec(code, loc) + +# The full version, including alpha/beta/rc tags. +version = loc["__version__"] + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +# +# This is also used if you do content translation via gettext catalogs. +# Usually you set "language" from the command line for these cases. +language = None + +# There are two options for replacing |today|: either, you set today to some +# non-false value, then it is used: +# today = '' +# Else, today_fmt is used as the format for a strftime call. +# today_fmt = '%B %d, %Y' + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +exclude_patterns = [".build", "release.rst"] + +# The reST default role (used for this markup: `text`) to use for all +# documents. +default_role = "any" + +# If true, '()' will be appended to :func: etc. cross-reference text. +# add_function_parentheses = True + +# If true, the current module name will be prepended to all description +# unit titles (such as .. function::). +# add_module_names = True + +# If true, sectionauthor and moduleauthor directives will be shown in the +# output. They are ignored by default. +# show_authors = False + +# The name of the Pygments (syntax highlighting) style to use. +# pygments_style = 'monokai' + +# A list of ignored prefixes for module index sorting. +# modindex_common_prefix = [] + +# If true, keep warnings as "system message" paragraphs in the built documents. +# keep_warnings = False + +# If true, `todo` and `todoList` produce output, else they produce nothing. +todo_include_todos = False + +# -- Options for HTML output ---------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. + +on_rtd = os.environ.get("READTHEDOCS", None) == "True" + +if not on_rtd: # only import and set the theme if we're building docs locally + import sphinx_rtd_theme + + html_theme = "sphinx_rtd_theme" + html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] + + html_context = {"css_files": ["_static/theme_overrides.css"]} +else: + html_context = { + "css_files": [ + "//media.readthedocs.org/css/sphinx_rtd_theme.css", + "//media.readthedocs.org/css/readthedocs-doc-embed.css", + "_static/theme_overrides.css", + ] + } + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +# html_theme_options = {} + +# Add any paths that contain custom themes here, relative to this directory. +# html_theme_path = [] + +# The name for this set of Sphinx documents. If None, it defaults to +# " v documentation". +# html_title = None + +# A shorter title for the navigation bar. Default is the same as html_title. +# html_short_title = None + +# The name of an image file (relative to this directory) to place at the top +# of the sidebar. +# html_logo = None + +# The name of an image file (within the static path) to use as favicon of the +# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 +# pixels large. +# html_favicon = None + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ["_static"] + +# Add any extra paths that contain custom files (such as robots.txt or +# .htaccess) here, relative to this directory. These files are copied +# directly to the root of the documentation. +# html_extra_path = [] + +# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, +# using the given strftime format. +# html_last_updated_fmt = '%b %d, %Y' + +# If true, SmartyPants will be used to convert quotes and dashes to +# typographically correct entities. +# html_use_smartypants = True + +# Custom sidebar templates, maps document names to template names. +# html_sidebars = {} + +# Additional templates that should be rendered to pages, maps page names to +# template names. +# html_additional_pages = {} + +# If false, no module index is generated. +# html_domain_indices = True + +# If false, no index is generated. +# html_use_index = True + +# If true, the index is split into individual pages for each letter. +# html_split_index = False + +# If true, links to the reST sources are added to the pages. +# html_show_sourcelink = True + +# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. +# html_show_sphinx = True + +# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. +# html_show_copyright = True + +# If true, an OpenSearch description file will be output, and all pages will +# contain a tag referring to it. The value of this option must be the +# base URL from which the finished HTML is served. +# html_use_opensearch = '' + +# This is the file name suffix for HTML files (e.g. ".xhtml"). +# html_file_suffix = None + +# Language to be used for generating the HTML full-text search index. +# Sphinx supports the following languages: +# 'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja' +# 'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr' +# html_search_language = 'en' + +# A dictionary with options for the search language support, empty by default. +# Now only 'ja' uses this config value +# html_search_options = {'type': 'default'} + +# The name of a javascript file (relative to the configuration directory) that +# implements a search results scorer. If empty, the default will be used. +# html_search_scorer = 'scorer.js' + +# Output file base name for HTML help builder. +htmlhelp_basename = "pybind11doc" + +# -- Options for LaTeX output --------------------------------------------- + +latex_engine = "pdflatex" + +latex_elements = { + # The paper size ('letterpaper' or 'a4paper'). + # 'papersize': 'letterpaper', + # + # The font size ('10pt', '11pt' or '12pt'). + # 'pointsize': '10pt', + # + # Additional stuff for the LaTeX preamble. + # remove blank pages (between the title page and the TOC, etc.) + "classoptions": ",openany,oneside", + "preamble": r""" +\usepackage{fontawesome} +\usepackage{textgreek} +\DeclareUnicodeCharacter{00A0}{} +\DeclareUnicodeCharacter{2194}{\faArrowsH} +\DeclareUnicodeCharacter{1F382}{\faBirthdayCake} +\DeclareUnicodeCharacter{1F355}{\faAdjust} +\DeclareUnicodeCharacter{0301}{'} +\DeclareUnicodeCharacter{03C0}{\textpi} + +""", + # Latex figure (float) alignment + # 'figure_align': 'htbp', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, +# author, documentclass [howto, manual, or own class]). +latex_documents = [(master_doc, "pybind11.tex", "pybind11 Documentation", + "Wenzel Jakob", "manual"), ] + +# The name of an image file (relative to this directory) to place at the top of +# the title page. +# latex_logo = 'pybind11-logo.png' + +# For "manual" documents, if this is true, then toplevel headings are parts, +# not chapters. +# latex_use_parts = False + +# If true, show page references after internal links. +# latex_show_pagerefs = False + +# If true, show URL addresses after external links. +# latex_show_urls = False + +# Documents to append as an appendix to all manuals. +# latex_appendices = [] + +# If false, no module index is generated. +# latex_domain_indices = True + +# -- Options for manual page output --------------------------------------- + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [(master_doc, "pybind11", "pybind11 Documentation", [author], 1)] + +# If true, show URL addresses after external links. +# man_show_urls = False + +# -- Options for Texinfo output ------------------------------------------- + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [( + master_doc, + "pybind11", + "pybind11 Documentation", + author, + "pybind11", + "One line description of project.", + "Miscellaneous", ), ] + +# Documents to append as an appendix to all manuals. +# texinfo_appendices = [] + +# If false, no module index is generated. +# texinfo_domain_indices = True + +# How to display URL addresses: 'footnote', 'no', or 'inline'. +# texinfo_show_urls = 'footnote' + +# If true, do not generate a @detailmenu in the "Top" node's menu. +# texinfo_no_detailmenu = False + +primary_domain = "cpp" +highlight_language = "cpp" + + +def generate_doxygen_xml(app): + build_dir = os.path.join(app.confdir, ".build") + if not os.path.exists(build_dir): + os.mkdir(build_dir) + + try: + subprocess.call(["doxygen", "--version"]) + retcode = subprocess.call(["doxygen"], cwd=app.confdir) + if retcode < 0: + sys.stderr.write(f"doxygen error code: {-retcode}\n") + except OSError as e: + sys.stderr.write(f"doxygen execution failed: {e}\n") + + +def prepare(app): + with open(DIR.parent / "README.rst") as f: + contents = f.read() + + if app.builder.name == "latex": + # Remove badges and stuff from start + contents = contents[contents.find(r".. start"):] + + # Filter out section titles for index.rst for LaTeX + contents = re.sub(r"^(.*)\n[-~]{3,}$", + r"**\1**", + contents, + flags=re.MULTILINE) + + with open(DIR / "readme.rst", "w") as f: + f.write(contents) + + +def clean_up(app, exception): + (DIR / "readme.rst").unlink() + + +def setup(app): + + # Add hook for building doxygen xml when needed + app.connect("builder-inited", generate_doxygen_xml) + + # Copy the readme in + app.connect("builder-inited", prepare) + + # Clean up the generated readme + app.connect("build-finished", clean_up) diff --git a/third_party/pybind11/docs/faq.rst b/third_party/pybind11/docs/faq.rst new file mode 100644 index 0000000000..28498e7dfc --- /dev/null +++ b/third_party/pybind11/docs/faq.rst @@ -0,0 +1,307 @@ +Frequently asked questions +########################## + +"ImportError: dynamic module does not define init function" +=========================================================== + +1. Make sure that the name specified in PYBIND11_MODULE is identical to the +filename of the extension library (without suffixes such as ``.so``). + +2. If the above did not fix the issue, you are likely using an incompatible +version of Python that does not match what you compiled with. + +"Symbol not found: ``__Py_ZeroStruct`` / ``_PyInstanceMethod_Type``" +======================================================================== + +See the first answer. + +"SystemError: dynamic module not initialized properly" +====================================================== + +See the first answer. + +The Python interpreter immediately crashes when importing my module +=================================================================== + +See the first answer. + +.. _faq_reference_arguments: + +Limitations involving reference arguments +========================================= + +In C++, it's fairly common to pass arguments using mutable references or +mutable pointers, which allows both read and write access to the value +supplied by the caller. This is sometimes done for efficiency reasons, or to +realize functions that have multiple return values. Here are two very basic +examples: + +.. code-block:: cpp + + void increment(int &i) { i++; } + void increment_ptr(int *i) { (*i)++; } + +In Python, all arguments are passed by reference, so there is no general +issue in binding such code from Python. + +However, certain basic Python types (like ``str``, ``int``, ``bool``, +``float``, etc.) are **immutable**. This means that the following attempt +to port the function to Python doesn't have the same effect on the value +provided by the caller -- in fact, it does nothing at all. + +.. code-block:: python + + def increment(i): + i += 1 # nope.. + +pybind11 is also affected by such language-level conventions, which means that +binding ``increment`` or ``increment_ptr`` will also create Python functions +that don't modify their arguments. + +Although inconvenient, one workaround is to encapsulate the immutable types in +a custom type that does allow modifications. + +An other alternative involves binding a small wrapper lambda function that +returns a tuple with all output arguments (see the remainder of the +documentation for examples on binding lambda functions). An example: + +.. code-block:: cpp + + int foo(int &i) { i++; return 123; } + +and the binding code + +.. code-block:: cpp + + m.def("foo", [](int i) { int rv = foo(i); return std::make_tuple(rv, i); }); + + +How can I reduce the build time? +================================ + +It's good practice to split binding code over multiple files, as in the +following example: + +:file:`example.cpp`: + +.. code-block:: cpp + + void init_ex1(py::module_ &); + void init_ex2(py::module_ &); + /* ... */ + + PYBIND11_MODULE(example, m) { + init_ex1(m); + init_ex2(m); + /* ... */ + } + +:file:`ex1.cpp`: + +.. code-block:: cpp + + void init_ex1(py::module_ &m) { + m.def("add", [](int a, int b) { return a + b; }); + } + +:file:`ex2.cpp`: + +.. code-block:: cpp + + void init_ex2(py::module_ &m) { + m.def("sub", [](int a, int b) { return a - b; }); + } + +:command:`python`: + +.. code-block:: pycon + + >>> import example + >>> example.add(1, 2) + 3 + >>> example.sub(1, 1) + 0 + +As shown above, the various ``init_ex`` functions should be contained in +separate files that can be compiled independently from one another, and then +linked together into the same final shared object. Following this approach +will: + +1. reduce memory requirements per compilation unit. + +2. enable parallel builds (if desired). + +3. allow for faster incremental builds. For instance, when a single class + definition is changed, only a subset of the binding code will generally need + to be recompiled. + +"recursive template instantiation exceeded maximum depth of 256" +================================================================ + +If you receive an error about excessive recursive template evaluation, try +specifying a larger value, e.g. ``-ftemplate-depth=1024`` on GCC/Clang. The +culprit is generally the generation of function signatures at compile time +using C++14 template metaprogramming. + +.. _`faq:hidden_visibility`: + +"'SomeClass' declared with greater visibility than the type of its field 'SomeClass::member' [-Wattributes]" +============================================================================================================ + +This error typically indicates that you are compiling without the required +``-fvisibility`` flag. pybind11 code internally forces hidden visibility on +all internal code, but if non-hidden (and thus *exported*) code attempts to +include a pybind type (for example, ``py::object`` or ``py::list``) you can run +into this warning. + +To avoid it, make sure you are specifying ``-fvisibility=hidden`` when +compiling pybind code. + +As to why ``-fvisibility=hidden`` is necessary, because pybind modules could +have been compiled under different versions of pybind itself, it is also +important that the symbols defined in one module do not clash with the +potentially-incompatible symbols defined in another. While Python extension +modules are usually loaded with localized symbols (under POSIX systems +typically using ``dlopen`` with the ``RTLD_LOCAL`` flag), this Python default +can be changed, but even if it isn't it is not always enough to guarantee +complete independence of the symbols involved when not using +``-fvisibility=hidden``. + +Additionally, ``-fvisibility=hidden`` can deliver considerably binary size +savings. (See the following section for more details.) + + +.. _`faq:symhidden`: + +How can I create smaller binaries? +================================== + +To do its job, pybind11 extensively relies on a programming technique known as +*template metaprogramming*, which is a way of performing computation at compile +time using type information. Template metaprogramming usually instantiates code +involving significant numbers of deeply nested types that are either completely +removed or reduced to just a few instructions during the compiler's optimization +phase. However, due to the nested nature of these types, the resulting symbol +names in the compiled extension library can be extremely long. For instance, +the included test suite contains the following symbol: + +.. only:: html + + .. code-block:: none + + _​_​Z​N​8​p​y​b​i​n​d​1​1​1​2​c​p​p​_​f​u​n​c​t​i​o​n​C​1​I​v​8​E​x​a​m​p​l​e​2​J​R​N​S​t​3​_​_​1​6​v​e​c​t​o​r​I​N​S​3​_​1​2​b​a​s​i​c​_​s​t​r​i​n​g​I​w​N​S​3​_​1​1​c​h​a​r​_​t​r​a​i​t​s​I​w​E​E​N​S​3​_​9​a​l​l​o​c​a​t​o​r​I​w​E​E​E​E​N​S​8​_​I​S​A​_​E​E​E​E​E​J​N​S​_​4​n​a​m​e​E​N​S​_​7​s​i​b​l​i​n​g​E​N​S​_​9​i​s​_​m​e​t​h​o​d​E​A​2​8​_​c​E​E​E​M​T​0​_​F​T​_​D​p​T​1​_​E​D​p​R​K​T​2​_ + +.. only:: not html + + .. code-block:: cpp + + __ZN8pybind1112cpp_functionC1Iv8Example2JRNSt3__16vectorINS3_12basic_stringIwNS3_11char_traitsIwEENS3_9allocatorIwEEEENS8_ISA_EEEEEJNS_4nameENS_7siblingENS_9is_methodEA28_cEEEMT0_FT_DpT1_EDpRKT2_ + +which is the mangled form of the following function type: + +.. code-block:: cpp + + pybind11::cpp_function::cpp_function, std::__1::allocator >, std::__1::allocator, std::__1::allocator > > >&, pybind11::name, pybind11::sibling, pybind11::is_method, char [28]>(void (Example2::*)(std::__1::vector, std::__1::allocator >, std::__1::allocator, std::__1::allocator > > >&), pybind11::name const&, pybind11::sibling const&, pybind11::is_method const&, char const (&) [28]) + +The memory needed to store just the mangled name of this function (196 bytes) +is larger than the actual piece of code (111 bytes) it represents! On the other +hand, it's silly to even give this function a name -- after all, it's just a +tiny cog in a bigger piece of machinery that is not exposed to the outside +world. So we'll generally only want to export symbols for those functions which +are actually called from the outside. + +This can be achieved by specifying the parameter ``-fvisibility=hidden`` to GCC +and Clang, which sets the default symbol visibility to *hidden*, which has a +tremendous impact on the final binary size of the resulting extension library. +(On Visual Studio, symbols are already hidden by default, so nothing needs to +be done there.) + +In addition to decreasing binary size, ``-fvisibility=hidden`` also avoids +potential serious issues when loading multiple modules and is required for +proper pybind operation. See the previous FAQ entry for more details. + +How can I properly handle Ctrl-C in long-running functions? +=========================================================== + +Ctrl-C is received by the Python interpreter, and holds it until the GIL +is released, so a long-running function won't be interrupted. + +To interrupt from inside your function, you can use the ``PyErr_CheckSignals()`` +function, that will tell if a signal has been raised on the Python side. This +function merely checks a flag, so its impact is negligible. When a signal has +been received, you must either explicitly interrupt execution by throwing +``py::error_already_set`` (which will propagate the existing +``KeyboardInterrupt``), or clear the error (which you usually will not want): + +.. code-block:: cpp + + PYBIND11_MODULE(example, m) + { + m.def("long running_func", []() + { + for (;;) { + if (PyErr_CheckSignals() != 0) + throw py::error_already_set(); + // Long running iteration + } + }); + } + +CMake doesn't detect the right Python version +============================================= + +The CMake-based build system will try to automatically detect the installed +version of Python and link against that. When this fails, or when there are +multiple versions of Python and it finds the wrong one, delete +``CMakeCache.txt`` and then add ``-DPYTHON_EXECUTABLE=$(which python)`` to your +CMake configure line. (Replace ``$(which python)`` with a path to python if +your prefer.) + +You can alternatively try ``-DPYBIND11_FINDPYTHON=ON``, which will activate the +new CMake FindPython support instead of pybind11's custom search. Requires +CMake 3.12+, and 3.15+ or 3.18.2+ are even better. You can set this in your +``CMakeLists.txt`` before adding or finding pybind11, as well. + +Inconsistent detection of Python version in CMake and pybind11 +============================================================== + +The functions ``find_package(PythonInterp)`` and ``find_package(PythonLibs)`` +provided by CMake for Python version detection are modified by pybind11 due to +unreliability and limitations that make them unsuitable for pybind11's needs. +Instead pybind11 provides its own, more reliable Python detection CMake code. +Conflicts can arise, however, when using pybind11 in a project that *also* uses +the CMake Python detection in a system with several Python versions installed. + +This difference may cause inconsistencies and errors if *both* mechanisms are +used in the same project. + +There are three possible solutions: + +1. Avoid using ``find_package(PythonInterp)`` and ``find_package(PythonLibs)`` + from CMake and rely on pybind11 in detecting Python version. If this is not + possible, the CMake machinery should be called *before* including pybind11. +2. Set ``PYBIND11_FINDPYTHON`` to ``True`` or use ``find_package(Python + COMPONENTS Interpreter Development)`` on modern CMake (3.12+, 3.15+ better, + 3.18.2+ best). Pybind11 in these cases uses the new CMake FindPython instead + of the old, deprecated search tools, and these modules are much better at + finding the correct Python. +3. Set ``PYBIND11_NOPYTHON`` to ``TRUE``. Pybind11 will not search for Python. + However, you will have to use the target-based system, and do more setup + yourself, because it does not know about or include things that depend on + Python, like ``pybind11_add_module``. This might be ideal for integrating + into an existing system, like scikit-build's Python helpers. + +How to cite this project? +========================= + +We suggest the following BibTeX template to cite pybind11 in scientific +discourse: + +.. code-block:: bash + + @misc{pybind11, + author = {Wenzel Jakob and Jason Rhinelander and Dean Moldovan}, + year = {2017}, + note = {https://github.com/pybind/pybind11}, + title = {pybind11 -- Seamless operability between C++11 and Python} + } diff --git a/third_party/pybind11/docs/index.rst b/third_party/pybind11/docs/index.rst new file mode 100644 index 0000000000..4e2e8ca3a0 --- /dev/null +++ b/third_party/pybind11/docs/index.rst @@ -0,0 +1,48 @@ +.. only:: latex + + Intro + ===== + +.. include:: readme.rst + +.. only:: not latex + + Contents: + +.. toctree:: + :maxdepth: 1 + + changelog + upgrade + +.. toctree:: + :caption: The Basics + :maxdepth: 2 + + installing + basics + classes + compiling + +.. toctree:: + :caption: Advanced Topics + :maxdepth: 2 + + advanced/functions + advanced/classes + advanced/exceptions + advanced/smart_ptrs + advanced/cast/index + advanced/pycpp/index + advanced/embedding + advanced/misc + +.. toctree:: + :caption: Extra Information + :maxdepth: 1 + + faq + benchmark + limitations + reference + cmake/index diff --git a/third_party/pybind11/docs/installing.rst b/third_party/pybind11/docs/installing.rst new file mode 100644 index 0000000000..30b9f1853d --- /dev/null +++ b/third_party/pybind11/docs/installing.rst @@ -0,0 +1,105 @@ +.. _installing: + +Installing the library +###################### + +There are several ways to get the pybind11 source, which lives at +`pybind/pybind11 on GitHub `_. The pybind11 +developers recommend one of the first three ways listed here, submodule, PyPI, +or conda-forge, for obtaining pybind11. + +.. _include_as_a_submodule: + +Include as a submodule +====================== + +When you are working on a project in Git, you can use the pybind11 repository +as a submodule. From your git repository, use: + +.. code-block:: bash + + git submodule add -b stable ../../pybind/pybind11 extern/pybind11 + git submodule update --init + +This assumes you are placing your dependencies in ``extern/``, and that you are +using GitHub; if you are not using GitHub, use the full https or ssh URL +instead of the relative URL ``../../pybind/pybind11`` above. Some other servers +also require the ``.git`` extension (GitHub does not). + +From here, you can now include ``extern/pybind11/include``, or you can use +the various integration tools (see :ref:`compiling`) pybind11 provides directly +from the local folder. + +Include with PyPI +================= + +You can download the sources and CMake files as a Python package from PyPI +using Pip. Just use: + +.. code-block:: bash + + pip install pybind11 + +This will provide pybind11 in a standard Python package format. If you want +pybind11 available directly in your environment root, you can use: + +.. code-block:: bash + + pip install "pybind11[global]" + +This is not recommended if you are installing with your system Python, as it +will add files to ``/usr/local/include/pybind11`` and +``/usr/local/share/cmake/pybind11``, so unless that is what you want, it is +recommended only for use in virtual environments or your ``pyproject.toml`` +file (see :ref:`compiling`). + +Include with conda-forge +======================== + +You can use pybind11 with conda packaging via `conda-forge +`_: + +.. code-block:: bash + + conda install -c conda-forge pybind11 + + +Include with vcpkg +================== +You can download and install pybind11 using the Microsoft `vcpkg +`_ dependency manager: + +.. code-block:: bash + + git clone https://github.com/Microsoft/vcpkg.git + cd vcpkg + ./bootstrap-vcpkg.sh + ./vcpkg integrate install + vcpkg install pybind11 + +The pybind11 port in vcpkg is kept up to date by Microsoft team members and +community contributors. If the version is out of date, please `create an issue +or pull request `_ on the vcpkg +repository. + +Global install with brew +======================== + +The brew package manager (Homebrew on macOS, or Linuxbrew on Linux) has a +`pybind11 package +`_. +To install: + +.. code-block:: bash + + brew install pybind11 + +.. We should list Conan, and possibly a few other C++ package managers (hunter, +.. perhaps). Conan has a very clean CMake integration that would be good to show. + +Other options +============= + +Other locations you can find pybind11 are `listed here +`_; these are maintained +by various packagers and the community. diff --git a/third_party/pybind11/docs/limitations.rst b/third_party/pybind11/docs/limitations.rst new file mode 100644 index 0000000000..def5ad659c --- /dev/null +++ b/third_party/pybind11/docs/limitations.rst @@ -0,0 +1,72 @@ +Limitations +########### + +Design choices +^^^^^^^^^^^^^^ + +pybind11 strives to be a general solution to binding generation, but it also has +certain limitations: + +- pybind11 casts away ``const``-ness in function arguments and return values. + This is in line with the Python language, which has no concept of ``const`` + values. This means that some additional care is needed to avoid bugs that + would be caught by the type checker in a traditional C++ program. + +- The NumPy interface ``pybind11::array`` greatly simplifies accessing + numerical data from C++ (and vice versa), but it's not a full-blown array + class like ``Eigen::Array`` or ``boost.multi_array``. ``Eigen`` objects are + directly supported, however, with ``pybind11/eigen.h``. + +Large but useful features could be implemented in pybind11 but would lead to a +significant increase in complexity. Pybind11 strives to be simple and compact. +Users who require large new features are encouraged to write an extension to +pybind11; see `pybind11_json `_ for an +example. + + +Known bugs +^^^^^^^^^^ + +These are issues that hopefully will one day be fixed, but currently are +unsolved. If you know how to help with one of these issues, contributions +are welcome! + +- Intel 20.2 is currently having an issue with the test suite. + `#2573 `_ + +- Debug mode Python does not support 1-5 tests in the test suite currently. + `#2422 `_ + +- PyPy3 7.3.1 and 7.3.2 have issues with several tests on 32-bit Windows. + +Known limitations +^^^^^^^^^^^^^^^^^ + +These are issues that are probably solvable, but have not been fixed yet. A +clean, well written patch would likely be accepted to solve them. + +- Type casters are not kept alive recursively. + `#2527 `_ + One consequence is that containers of ``char *`` are currently not supported. + `#2245 `_ + +- The ``cpptest`` does not run on Windows with Python 3.8 or newer, due to DLL + loader changes. User code that is correctly installed should not be affected. + `#2560 `_ + +Python 3.9.0 warning +^^^^^^^^^^^^^^^^^^^^ + +Combining older versions of pybind11 (< 2.6.0) with Python on exactly 3.9.0 +will trigger undefined behavior that typically manifests as crashes during +interpreter shutdown (but could also destroy your data. **You have been +warned**). + +This issue was `fixed in Python `_. +As a mitigation for this bug, pybind11 2.6.0 or newer includes a workaround +specifically when Python 3.9.0 is detected at runtime, leaking about 50 bytes +of memory when a callback function is garbage collected. For reference, the +pybind11 test suite has about 2,000 such callbacks, but only 49 are garbage +collected before the end-of-process. Wheels (even if built with Python 3.9.0) +will correctly avoid the leak when run in Python 3.9.1, and this does not +affect other 3.X versions. diff --git a/third_party/pybind11/docs/pybind11-logo.png b/third_party/pybind11/docs/pybind11-logo.png new file mode 100644 index 0000000000..4cbad54f79 Binary files /dev/null and b/third_party/pybind11/docs/pybind11-logo.png differ diff --git a/third_party/pybind11/docs/pybind11_vs_boost_python1.png b/third_party/pybind11/docs/pybind11_vs_boost_python1.png new file mode 100644 index 0000000000..833231f240 Binary files /dev/null and b/third_party/pybind11/docs/pybind11_vs_boost_python1.png differ diff --git a/third_party/pybind11/docs/pybind11_vs_boost_python1.svg b/third_party/pybind11/docs/pybind11_vs_boost_python1.svg new file mode 100644 index 0000000000..5bf950e6fd --- /dev/null +++ b/third_party/pybind11/docs/pybind11_vs_boost_python1.svg @@ -0,0 +1,427 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/third_party/pybind11/docs/pybind11_vs_boost_python2.png b/third_party/pybind11/docs/pybind11_vs_boost_python2.png new file mode 100644 index 0000000000..9f17272c50 Binary files /dev/null and b/third_party/pybind11/docs/pybind11_vs_boost_python2.png differ diff --git a/third_party/pybind11/docs/pybind11_vs_boost_python2.svg b/third_party/pybind11/docs/pybind11_vs_boost_python2.svg new file mode 100644 index 0000000000..5ed6530ca1 --- /dev/null +++ b/third_party/pybind11/docs/pybind11_vs_boost_python2.svg @@ -0,0 +1,427 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/third_party/pybind11/docs/reference.rst b/third_party/pybind11/docs/reference.rst new file mode 100644 index 0000000000..e64a03519d --- /dev/null +++ b/third_party/pybind11/docs/reference.rst @@ -0,0 +1,130 @@ +.. _reference: + +.. warning:: + + Please be advised that the reference documentation discussing pybind11 + internals is currently incomplete. Please refer to the previous sections + and the pybind11 header files for the nitty gritty details. + +Reference +######### + +.. _macros: + +Macros +====== + +.. doxygendefine:: PYBIND11_MODULE + +.. _core_types: + +Convenience classes for arbitrary Python types +============================================== + +Common member functions +----------------------- + +.. doxygenclass:: object_api + :members: + +Without reference counting +-------------------------- + +.. doxygenclass:: handle + :members: + +With reference counting +----------------------- + +.. doxygenclass:: object + :members: + +.. doxygenfunction:: reinterpret_borrow + +.. doxygenfunction:: reinterpret_steal + +Convenience classes for specific Python types +============================================= + +.. doxygenclass:: module_ + :members: + +.. doxygengroup:: pytypes + :members: + +Convenience functions converting to Python types +================================================ + +.. doxygenfunction:: make_tuple(Args&&...) + +.. doxygenfunction:: make_iterator(Iterator, Sentinel, Extra &&...) +.. doxygenfunction:: make_iterator(Type &, Extra&&...) + +.. doxygenfunction:: make_key_iterator(Iterator, Sentinel, Extra &&...) +.. doxygenfunction:: make_key_iterator(Type &, Extra&&...) + +.. doxygenfunction:: make_value_iterator(Iterator, Sentinel, Extra &&...) +.. doxygenfunction:: make_value_iterator(Type &, Extra&&...) + +.. _extras: + +Passing extra arguments to ``def`` or ``class_`` +================================================ + +.. doxygengroup:: annotations + :members: + +Embedding the interpreter +========================= + +.. doxygendefine:: PYBIND11_EMBEDDED_MODULE + +.. doxygenfunction:: initialize_interpreter + +.. doxygenfunction:: finalize_interpreter + +.. doxygenclass:: scoped_interpreter + +Redirecting C++ streams +======================= + +.. doxygenclass:: scoped_ostream_redirect + +.. doxygenclass:: scoped_estream_redirect + +.. doxygenfunction:: add_ostream_redirect + +Python built-in functions +========================= + +.. doxygengroup:: python_builtins + :members: + +Inheritance +=========== + +See :doc:`/classes` and :doc:`/advanced/classes` for more detail. + +.. doxygendefine:: PYBIND11_OVERRIDE + +.. doxygendefine:: PYBIND11_OVERRIDE_PURE + +.. doxygendefine:: PYBIND11_OVERRIDE_NAME + +.. doxygendefine:: PYBIND11_OVERRIDE_PURE_NAME + +.. doxygenfunction:: get_override + +Exceptions +========== + +.. doxygenclass:: error_already_set + :members: + +.. doxygenclass:: builtin_exception + :members: + +Literals +======== + +.. doxygennamespace:: literals diff --git a/third_party/pybind11/docs/release.rst b/third_party/pybind11/docs/release.rst new file mode 100644 index 0000000000..e761cdf7a6 --- /dev/null +++ b/third_party/pybind11/docs/release.rst @@ -0,0 +1,97 @@ +On version numbers +^^^^^^^^^^^^^^^^^^ + +The two version numbers (C++ and Python) must match when combined (checked when +you build the PyPI package), and must be a valid `PEP 440 +`_ version when combined. + +For example: + +.. code-block:: C++ + + #define PYBIND11_VERSION_MAJOR X + #define PYBIND11_VERSION_MINOR Y + #define PYBIND11_VERSION_PATCH Z.dev1 + +For beta, ``PYBIND11_VERSION_PATCH`` should be ``Z.b1``. RC's can be ``Z.rc1``. +Always include the dot (even though PEP 440 allows it to be dropped). For a +final release, this must be a simple integer. There is also a HEX version of +the version just below. + + +To release a new version of pybind11: +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If you don't have nox, you should either use ``pipx run nox`` instead, or use +``pipx install nox`` or ``brew install nox`` (Unix). + +- Update the version number + - Update ``PYBIND11_VERSION_MAJOR`` etc. in + ``include/pybind11/detail/common.h``. PATCH should be a simple integer. + - Update the version HEX just below, as well. + - Update ``pybind11/_version.py`` (match above) + - Run ``nox -s tests_packaging`` to ensure this was done correctly. + - Ensure that all the information in ``setup.cfg`` is up-to-date, like + supported Python versions. + - Add release date in ``docs/changelog.rst``. + - Check to make sure + `needs-changelog `_ + issues are entered in the changelog (clear the label when done). + - ``git add`` and ``git commit``, ``git push``. **Ensure CI passes**. (If it + fails due to a known flake issue, either ignore or restart CI.) +- Add a release branch if this is a new minor version, or update the existing release branch if it is a patch version + - New branch: ``git checkout -b vX.Y``, ``git push -u origin vX.Y`` + - Update branch: ``git checkout vX.Y``, ``git merge ``, ``git push`` +- Update tags (optional; if you skip this, the GitHub release makes a + non-annotated tag for you) + - ``git tag -a vX.Y.Z -m 'vX.Y.Z release'``. + - ``git push --tags``. +- Update stable + - ``git checkout stable`` + - ``git merge master`` + - ``git push`` +- Make a GitHub release (this shows up in the UI, sends new release + notifications to users watching releases, and also uploads PyPI packages). + (Note: if you do not use an existing tag, this creates a new lightweight tag + for you, so you could skip the above step.) + - GUI method: Under `releases `_ + click "Draft a new release" on the far right, fill in the tag name + (if you didn't tag above, it will be made here), fill in a release name + like "Version X.Y.Z", and copy-and-paste the markdown-formatted (!) changelog + into the description (usually ``cat docs/changelog.rst | pandoc -f rst -t gfm``). + Check "pre-release" if this is a beta/RC. + - CLI method: with ``gh`` installed, run ``gh release create vX.Y.Z -t "Version X.Y.Z"`` + If this is a pre-release, add ``-p``. + +- Get back to work + - Make sure you are on master, not somewhere else: ``git checkout master`` + - Update version macros in ``include/pybind11/detail/common.h`` (set PATCH to + ``0.dev1`` and increment MINOR). + - Update ``_version.py`` to match + - Run ``nox -s tests_packaging`` to ensure this was done correctly. + - Add a spot for in-development updates in ``docs/changelog.rst``. + - ``git add``, ``git commit``, ``git push`` + +If a version branch is updated, remember to set PATCH to ``1.dev1``. + +If you'd like to bump homebrew, run: + +.. code-block:: console + + brew bump-formula-pr --url https://github.com/pybind/pybind11/archive/vX.Y.Z.tar.gz + +Conda-forge should automatically make a PR in a few hours, and automatically +merge it if there are no issues. + + +Manual packaging +^^^^^^^^^^^^^^^^ + +If you need to manually upload releases, you can download the releases from the job artifacts and upload them with twine. You can also make the files locally (not recommended in general, as your local directory is more likely to be "dirty" and SDists love picking up random unrelated/hidden files); this is the procedure: + +.. code-block:: bash + + nox -s build + twine upload dist/* + +This makes SDists and wheels, and the final line uploads them. diff --git a/third_party/pybind11/docs/requirements.txt b/third_party/pybind11/docs/requirements.txt new file mode 100644 index 0000000000..e452ed261f --- /dev/null +++ b/third_party/pybind11/docs/requirements.txt @@ -0,0 +1,5 @@ +breathe==4.32.0 +sphinx==4.4.0 +sphinx_rtd_theme==1.0.0 +sphinxcontrib-moderncmakedomain==3.21.4 +sphinxcontrib-svg2pdfconverter==1.2.0 diff --git a/third_party/pybind11/docs/upgrade.rst b/third_party/pybind11/docs/upgrade.rst new file mode 100644 index 0000000000..6a9db2d08f --- /dev/null +++ b/third_party/pybind11/docs/upgrade.rst @@ -0,0 +1,552 @@ +Upgrade guide +############# + +This is a companion guide to the :doc:`changelog`. While the changelog briefly +lists all of the new features, improvements and bug fixes, this upgrade guide +focuses only the subset which directly impacts your experience when upgrading +to a new version. But it goes into more detail. This includes things like +deprecated APIs and their replacements, build system changes, general code +modernization and other useful information. + +.. _upgrade-guide-2.9: + +v2.9 +==== + +* Any usage of the recently added ``py::make_simple_namespace`` should be + converted to using ``py::module_::import("types").attr("SimpleNamespace")`` + instead. + +* The use of ``_`` in custom type casters can now be replaced with the more + readable ``const_name`` instead. The old ``_`` shortcut has been retained + unless it is being used as a macro (like for gettext). + + +.. _upgrade-guide-2.7: + +v2.7 +==== + +*Before* v2.7, ``py::str`` can hold ``PyUnicodeObject`` or ``PyBytesObject``, +and ``py::isinstance()`` is ``true`` for both ``py::str`` and +``py::bytes``. Starting with v2.7, ``py::str`` exclusively holds +``PyUnicodeObject`` (`#2409 `_), +and ``py::isinstance()`` is ``true`` only for ``py::str``. To help in +the transition of user code, the ``PYBIND11_STR_LEGACY_PERMISSIVE`` macro +is provided as an escape hatch to go back to the legacy behavior. This macro +will be removed in future releases. Two types of required fixes are expected +to be common: + +* Accidental use of ``py::str`` instead of ``py::bytes``, masked by the legacy + behavior. These are probably very easy to fix, by changing from + ``py::str`` to ``py::bytes``. + +* Reliance on py::isinstance(obj) being ``true`` for + ``py::bytes``. This is likely to be easy to fix in most cases by adding + ``|| py::isinstance(obj)``, but a fix may be more involved, e.g. if + ``py::isinstance`` appears in a template. Such situations will require + careful review and custom fixes. + + +.. _upgrade-guide-2.6: + +v2.6 +==== + +Usage of the ``PYBIND11_OVERLOAD*`` macros and ``get_overload`` function should +be replaced by ``PYBIND11_OVERRIDE*`` and ``get_override``. In the future, the +old macros may be deprecated and removed. + +``py::module`` has been renamed ``py::module_``, but a backward compatible +typedef has been included. This change was to avoid a language change in C++20 +that requires unqualified ``module`` not be placed at the start of a logical +line. Qualified usage is unaffected and the typedef will remain unless the +C++ language rules change again. + +The public constructors of ``py::module_`` have been deprecated. Use +``PYBIND11_MODULE`` or ``module_::create_extension_module`` instead. + +An error is now thrown when ``__init__`` is forgotten on subclasses. This was +incorrect before, but was not checked. Add a call to ``__init__`` if it is +missing. + +A ``py::type_error`` is now thrown when casting to a subclass (like +``py::bytes`` from ``py::object``) if the conversion is not valid. Make a valid +conversion instead. + +The undocumented ``h.get_type()`` method has been deprecated and replaced by +``py::type::of(h)``. + +Enums now have a ``__str__`` method pre-defined; if you want to override it, +the simplest fix is to add the new ``py::prepend()`` tag when defining +``"__str__"``. + +If ``__eq__`` defined but not ``__hash__``, ``__hash__`` is now set to +``None``, as in normal CPython. You should add ``__hash__`` if you intended the +class to be hashable, possibly using the new ``py::hash`` shortcut. + +The constructors for ``py::array`` now always take signed integers for size, +for consistency. This may lead to compiler warnings on some systems. Cast to +``py::ssize_t`` instead of ``std::size_t``. + +The ``tools/clang`` submodule and ``tools/mkdoc.py`` have been moved to a +standalone package, `pybind11-mkdoc`_. If you were using those tools, please +use them via a pip install from the new location. + +The ``pybind11`` package on PyPI no longer fills the wheel "headers" slot - if +you were using the headers from this slot, they are available by requesting the +``global`` extra, that is, ``pip install "pybind11[global]"``. (Most users will +be unaffected, as the ``pybind11/include`` location is reported by ``python -m +pybind11 --includes`` and ``pybind11.get_include()`` is still correct and has +not changed since 2.5). + +.. _pybind11-mkdoc: https://github.com/pybind/pybind11-mkdoc + +CMake support: +-------------- + +The minimum required version of CMake is now 3.4. Several details of the CMake +support have been deprecated; warnings will be shown if you need to change +something. The changes are: + +* ``PYBIND11_CPP_STANDARD=`` is deprecated, please use + ``CMAKE_CXX_STANDARD=`` instead, or any other valid CMake CXX or CUDA + standard selection method, like ``target_compile_features``. + +* If you do not request a standard, pybind11 targets will compile with the + compiler default, but not less than C++11, instead of forcing C++14 always. + If you depend on the old behavior, please use ``set(CMAKE_CXX_STANDARD 14 CACHE STRING "")`` + instead. + +* Direct ``pybind11::module`` usage should always be accompanied by at least + ``set(CMAKE_CXX_VISIBILITY_PRESET hidden)`` or similar - it used to try to + manually force this compiler flag (but not correctly on all compilers or with + CUDA). + +* ``pybind11_add_module``'s ``SYSTEM`` argument is deprecated and does nothing; + linking now behaves like other imported libraries consistently in both + config and submodule mode, and behaves like a ``SYSTEM`` library by + default. + +* If ``PYTHON_EXECUTABLE`` is not set, virtual environments (``venv``, + ``virtualenv``, and ``conda``) are prioritized over the standard search + (similar to the new FindPython mode). + +In addition, the following changes may be of interest: + +* ``CMAKE_INTERPROCEDURAL_OPTIMIZATION`` will be respected by + ``pybind11_add_module`` if set instead of linking to ``pybind11::lto`` or + ``pybind11::thin_lto``. + +* Using ``find_package(Python COMPONENTS Interpreter Development)`` before + pybind11 will cause pybind11 to use the new Python mechanisms instead of its + own custom search, based on a patched version of classic ``FindPythonInterp`` + / ``FindPythonLibs``. In the future, this may become the default. A recent + (3.15+ or 3.18.2+) version of CMake is recommended. + + + +v2.5 +==== + +The Python package now includes the headers as data in the package itself, as +well as in the "headers" wheel slot. ``pybind11 --includes`` and +``pybind11.get_include()`` report the new location, which is always correct +regardless of how pybind11 was installed, making the old ``user=`` argument +meaningless. If you are not using the function to get the location already, you +are encouraged to switch to the package location. + + +v2.2 +==== + +Deprecation of the ``PYBIND11_PLUGIN`` macro +-------------------------------------------- + +``PYBIND11_MODULE`` is now the preferred way to create module entry points. +The old macro emits a compile-time deprecation warning. + +.. code-block:: cpp + + // old + PYBIND11_PLUGIN(example) { + py::module m("example", "documentation string"); + + m.def("add", [](int a, int b) { return a + b; }); + + return m.ptr(); + } + + // new + PYBIND11_MODULE(example, m) { + m.doc() = "documentation string"; // optional + + m.def("add", [](int a, int b) { return a + b; }); + } + + +New API for defining custom constructors and pickling functions +--------------------------------------------------------------- + +The old placement-new custom constructors have been deprecated. The new approach +uses ``py::init()`` and factory functions to greatly improve type safety. + +Placement-new can be called accidentally with an incompatible type (without any +compiler errors or warnings), or it can initialize the same object multiple times +if not careful with the Python-side ``__init__`` calls. The new-style custom +constructors prevent such mistakes. See :ref:`custom_constructors` for details. + +.. code-block:: cpp + + // old -- deprecated (runtime warning shown only in debug mode) + py::class(m, "Foo") + .def("__init__", [](Foo &self, ...) { + new (&self) Foo(...); // uses placement-new + }); + + // new + py::class(m, "Foo") + .def(py::init([](...) { // Note: no `self` argument + return new Foo(...); // return by raw pointer + // or: return std::make_unique(...); // return by holder + // or: return Foo(...); // return by value (move constructor) + })); + +Mirroring the custom constructor changes, ``py::pickle()`` is now the preferred +way to get and set object state. See :ref:`pickling` for details. + +.. code-block:: cpp + + // old -- deprecated (runtime warning shown only in debug mode) + py::class(m, "Foo") + ... + .def("__getstate__", [](const Foo &self) { + return py::make_tuple(self.value1(), self.value2(), ...); + }) + .def("__setstate__", [](Foo &self, py::tuple t) { + new (&self) Foo(t[0].cast(), ...); + }); + + // new + py::class(m, "Foo") + ... + .def(py::pickle( + [](const Foo &self) { // __getstate__ + return py::make_tuple(self.value1(), self.value2(), ...); // unchanged + }, + [](py::tuple t) { // __setstate__, note: no `self` argument + return new Foo(t[0].cast(), ...); + // or: return std::make_unique(...); // return by holder + // or: return Foo(...); // return by value (move constructor) + } + )); + +For both the constructors and pickling, warnings are shown at module +initialization time (on import, not when the functions are called). +They're only visible when compiled in debug mode. Sample warning: + +.. code-block:: none + + pybind11-bound class 'mymodule.Foo' is using an old-style placement-new '__init__' + which has been deprecated. See the upgrade guide in pybind11's docs. + + +Stricter enforcement of hidden symbol visibility for pybind11 modules +--------------------------------------------------------------------- + +pybind11 now tries to actively enforce hidden symbol visibility for modules. +If you're using either one of pybind11's :doc:`CMake or Python build systems +` (the two example repositories) and you haven't been exporting any +symbols, there's nothing to be concerned about. All the changes have been done +transparently in the background. If you were building manually or relied on +specific default visibility, read on. + +Setting default symbol visibility to *hidden* has always been recommended for +pybind11 (see :ref:`faq:symhidden`). On Linux and macOS, hidden symbol +visibility (in conjunction with the ``strip`` utility) yields much smaller +module binaries. `CPython's extension docs`_ also recommend hiding symbols +by default, with the goal of avoiding symbol name clashes between modules. +Starting with v2.2, pybind11 enforces this more strictly: (1) by declaring +all symbols inside the ``pybind11`` namespace as hidden and (2) by including +the ``-fvisibility=hidden`` flag on Linux and macOS (only for extension +modules, not for embedding the interpreter). + +.. _CPython's extension docs: https://docs.python.org/3/extending/extending.html#providing-a-c-api-for-an-extension-module + +The namespace-scope hidden visibility is done automatically in pybind11's +headers and it's generally transparent to users. It ensures that: + +* Modules compiled with different pybind11 versions don't clash with each other. + +* Some new features, like ``py::module_local`` bindings, can work as intended. + +The ``-fvisibility=hidden`` flag applies the same visibility to user bindings +outside of the ``pybind11`` namespace. It's now set automatic by pybind11's +CMake and Python build systems, but this needs to be done manually by users +of other build systems. Adding this flag: + +* Minimizes the chances of symbol conflicts between modules. E.g. if two + unrelated modules were statically linked to different (ABI-incompatible) + versions of the same third-party library, a symbol clash would be likely + (and would end with unpredictable results). + +* Produces smaller binaries on Linux and macOS, as pointed out previously. + +Within pybind11's CMake build system, ``pybind11_add_module`` has always been +setting the ``-fvisibility=hidden`` flag in release mode. From now on, it's +being applied unconditionally, even in debug mode and it can no longer be opted +out of with the ``NO_EXTRAS`` option. The ``pybind11::module`` target now also +adds this flag to its interface. The ``pybind11::embed`` target is unchanged. + +The most significant change here is for the ``pybind11::module`` target. If you +were previously relying on default visibility, i.e. if your Python module was +doubling as a shared library with dependents, you'll need to either export +symbols manually (recommended for cross-platform libraries) or factor out the +shared library (and have the Python module link to it like the other +dependents). As a temporary workaround, you can also restore default visibility +using the CMake code below, but this is not recommended in the long run: + +.. code-block:: cmake + + target_link_libraries(mymodule PRIVATE pybind11::module) + + add_library(restore_default_visibility INTERFACE) + target_compile_options(restore_default_visibility INTERFACE -fvisibility=default) + target_link_libraries(mymodule PRIVATE restore_default_visibility) + + +Local STL container bindings +---------------------------- + +Previous pybind11 versions could only bind types globally -- all pybind11 +modules, even unrelated ones, would have access to the same exported types. +However, this would also result in a conflict if two modules exported the +same C++ type, which is especially problematic for very common types, e.g. +``std::vector``. :ref:`module_local` were added to resolve this (see +that section for a complete usage guide). + +``py::class_`` still defaults to global bindings (because these types are +usually unique across modules), however in order to avoid clashes of opaque +types, ``py::bind_vector`` and ``py::bind_map`` will now bind STL containers +as ``py::module_local`` if their elements are: builtins (``int``, ``float``, +etc.), not bound using ``py::class_``, or bound as ``py::module_local``. For +example, this change allows multiple modules to bind ``std::vector`` +without causing conflicts. See :ref:`stl_bind` for more details. + +When upgrading to this version, if you have multiple modules which depend on +a single global binding of an STL container, note that all modules can still +accept foreign ``py::module_local`` types in the direction of Python-to-C++. +The locality only affects the C++-to-Python direction. If this is needed in +multiple modules, you'll need to either: + +* Add a copy of the same STL binding to all of the modules which need it. + +* Restore the global status of that single binding by marking it + ``py::module_local(false)``. + +The latter is an easy workaround, but in the long run it would be best to +localize all common type bindings in order to avoid conflicts with +third-party modules. + + +Negative strides for Python buffer objects and numpy arrays +----------------------------------------------------------- + +Support for negative strides required changing the integer type from unsigned +to signed in the interfaces of ``py::buffer_info`` and ``py::array``. If you +have compiler warnings enabled, you may notice some new conversion warnings +after upgrading. These can be resolved using ``static_cast``. + + +Deprecation of some ``py::object`` APIs +--------------------------------------- + +To compare ``py::object`` instances by pointer, you should now use +``obj1.is(obj2)`` which is equivalent to ``obj1 is obj2`` in Python. +Previously, pybind11 used ``operator==`` for this (``obj1 == obj2``), but +that could be confusing and is now deprecated (so that it can eventually +be replaced with proper rich object comparison in a future release). + +For classes which inherit from ``py::object``, ``borrowed`` and ``stolen`` +were previously available as protected constructor tags. Now the types +should be used directly instead: ``borrowed_t{}`` and ``stolen_t{}`` +(`#771 `_). + + +Stricter compile-time error checking +------------------------------------ + +Some error checks have been moved from run time to compile time. Notably, +automatic conversion of ``std::shared_ptr`` is not possible when ``T`` is +not directly registered with ``py::class_`` (e.g. ``std::shared_ptr`` +or ``std::shared_ptr>`` are not automatically convertible). +Attempting to bind a function with such arguments now results in a compile-time +error instead of waiting to fail at run time. + +``py::init<...>()`` constructor definitions are also stricter and now prevent +bindings which could cause unexpected behavior: + +.. code-block:: cpp + + struct Example { + Example(int &); + }; + + py::class_(m, "Example") + .def(py::init()); // OK, exact match + // .def(py::init()); // compile-time error, mismatch + +A non-``const`` lvalue reference is not allowed to bind to an rvalue. However, +note that a constructor taking ``const T &`` can still be registered using +``py::init()`` because a ``const`` lvalue reference can bind to an rvalue. + +v2.1 +==== + +Minimum compiler versions are enforced at compile time +------------------------------------------------------ + +The minimums also apply to v2.0 but the check is now explicit and a compile-time +error is raised if the compiler does not meet the requirements: + +* GCC >= 4.8 +* clang >= 3.3 (appleclang >= 5.0) +* MSVC >= 2015u3 +* Intel C++ >= 15.0 + + +The ``py::metaclass`` attribute is not required for static properties +--------------------------------------------------------------------- + +Binding classes with static properties is now possible by default. The +zero-parameter version of ``py::metaclass()`` is deprecated. However, a new +one-parameter ``py::metaclass(python_type)`` version was added for rare +cases when a custom metaclass is needed to override pybind11's default. + +.. code-block:: cpp + + // old -- emits a deprecation warning + py::class_(m, "Foo", py::metaclass()) + .def_property_readonly_static("foo", ...); + + // new -- static properties work without the attribute + py::class_(m, "Foo") + .def_property_readonly_static("foo", ...); + + // new -- advanced feature, override pybind11's default metaclass + py::class_(m, "Bar", py::metaclass(custom_python_type)) + ... + + +v2.0 +==== + +Breaking changes in ``py::class_`` +---------------------------------- + +These changes were necessary to make type definitions in pybind11 +future-proof, to support PyPy via its ``cpyext`` mechanism (`#527 +`_), and to improve efficiency +(`rev. 86d825 `_). + +1. Declarations of types that provide access via the buffer protocol must + now include the ``py::buffer_protocol()`` annotation as an argument to + the ``py::class_`` constructor. + + .. code-block:: cpp + + py::class_("Matrix", py::buffer_protocol()) + .def(py::init<...>()) + .def_buffer(...); + +2. Classes which include static properties (e.g. ``def_readwrite_static()``) + must now include the ``py::metaclass()`` attribute. Note: this requirement + has since been removed in v2.1. If you're upgrading from 1.x, it's + recommended to skip directly to v2.1 or newer. + +3. This version of pybind11 uses a redesigned mechanism for instantiating + trampoline classes that are used to override virtual methods from within + Python. This led to the following user-visible syntax change: + + .. code-block:: cpp + + // old v1.x syntax + py::class_("MyClass") + .alias() + ... + + // new v2.x syntax + py::class_("MyClass") + ... + + Importantly, both the original and the trampoline class are now specified + as arguments to the ``py::class_`` template, and the ``alias<..>()`` call + is gone. The new scheme has zero overhead in cases when Python doesn't + override any functions of the underlying C++ class. + `rev. 86d825 `_. + + The class type must be the first template argument given to ``py::class_`` + while the trampoline can be mixed in arbitrary order with other arguments + (see the following section). + + +Deprecation of the ``py::base()`` attribute +---------------------------------------------- + +``py::base()`` was deprecated in favor of specifying ``T`` as a template +argument to ``py::class_``. This new syntax also supports multiple inheritance. +Note that, while the type being exported must be the first argument in the +``py::class_`` template, the order of the following types (bases, +holder and/or trampoline) is not important. + +.. code-block:: cpp + + // old v1.x + py::class_("Derived", py::base()); + + // new v2.x + py::class_("Derived"); + + // new -- multiple inheritance + py::class_("Derived"); + + // new -- apart from `Derived` the argument order can be arbitrary + py::class_("Derived"); + + +Out-of-the-box support for ``std::shared_ptr`` +---------------------------------------------- + +The relevant type caster is now built in, so it's no longer necessary to +include a declaration of the form: + +.. code-block:: cpp + + PYBIND11_DECLARE_HOLDER_TYPE(T, std::shared_ptr) + +Continuing to do so won't cause an error or even a deprecation warning, +but it's completely redundant. + + +Deprecation of a few ``py::object`` APIs +---------------------------------------- + +All of the old-style calls emit deprecation warnings. + ++---------------------------------------+---------------------------------------------+ +| Old syntax | New syntax | ++=======================================+=============================================+ +| ``obj.call(args...)`` | ``obj(args...)`` | ++---------------------------------------+---------------------------------------------+ +| ``obj.str()`` | ``py::str(obj)`` | ++---------------------------------------+---------------------------------------------+ +| ``auto l = py::list(obj); l.check()`` | ``py::isinstance(obj)`` | ++---------------------------------------+---------------------------------------------+ +| ``py::object(ptr, true)`` | ``py::reinterpret_borrow(ptr)`` | ++---------------------------------------+---------------------------------------------+ +| ``py::object(ptr, false)`` | ``py::reinterpret_steal(ptr)`` | ++---------------------------------------+---------------------------------------------+ +| ``if (obj.attr("foo"))`` | ``if (py::hasattr(obj, "foo"))`` | ++---------------------------------------+---------------------------------------------+ +| ``if (obj["bar"])`` | ``if (obj.contains("bar"))`` | ++---------------------------------------+---------------------------------------------+ diff --git a/third_party/pybind11/include/pybind11/attr.h b/third_party/pybind11/include/pybind11/attr.h new file mode 100644 index 0000000000..14600e9bb0 --- /dev/null +++ b/third_party/pybind11/include/pybind11/attr.h @@ -0,0 +1,676 @@ +/* + pybind11/attr.h: Infrastructure for processing custom + type and function attributes + + Copyright (c) 2016 Wenzel Jakob + + All rights reserved. Use of this source code is governed by a + BSD-style license that can be found in the LICENSE file. +*/ + +#pragma once + +#include "detail/common.h" +#include "cast.h" + +#include + +PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE) + +/// \addtogroup annotations +/// @{ + +/// Annotation for methods +struct is_method { + handle class_; + explicit is_method(const handle &c) : class_(c) {} +}; + +/// Annotation for operators +struct is_operator {}; + +/// Annotation for classes that cannot be subclassed +struct is_final {}; + +/// Annotation for parent scope +struct scope { + handle value; + explicit scope(const handle &s) : value(s) {} +}; + +/// Annotation for documentation +struct doc { + const char *value; + explicit doc(const char *value) : value(value) {} +}; + +/// Annotation for function names +struct name { + const char *value; + explicit name(const char *value) : value(value) {} +}; + +/// Annotation indicating that a function is an overload associated with a given "sibling" +struct sibling { + handle value; + explicit sibling(const handle &value) : value(value.ptr()) {} +}; + +/// Annotation indicating that a class derives from another given type +template +struct base { + + PYBIND11_DEPRECATED( + "base() was deprecated in favor of specifying 'T' as a template argument to class_") + base() = default; +}; + +/// Keep patient alive while nurse lives +template +struct keep_alive {}; + +/// Annotation indicating that a class is involved in a multiple inheritance relationship +struct multiple_inheritance {}; + +/// Annotation which enables dynamic attributes, i.e. adds `__dict__` to a class +struct dynamic_attr {}; + +/// Annotation which enables the buffer protocol for a type +struct buffer_protocol {}; + +/// Annotation which requests that a special metaclass is created for a type +struct metaclass { + handle value; + + PYBIND11_DEPRECATED("py::metaclass() is no longer required. It's turned on by default now.") + metaclass() = default; + + /// Override pybind11's default metaclass + explicit metaclass(handle value) : value(value) {} +}; + +/// Specifies a custom callback with signature `void (PyHeapTypeObject*)` that +/// may be used to customize the Python type. +/// +/// The callback is invoked immediately before `PyType_Ready`. +/// +/// Note: This is an advanced interface, and uses of it may require changes to +/// work with later versions of pybind11. You may wish to consult the +/// implementation of `make_new_python_type` in `detail/classes.h` to understand +/// the context in which the callback will be run. +struct custom_type_setup { + using callback = std::function; + + explicit custom_type_setup(callback value) : value(std::move(value)) {} + + callback value; +}; + +/// Annotation that marks a class as local to the module: +struct module_local { + const bool value; + constexpr explicit module_local(bool v = true) : value(v) {} +}; + +/// Annotation to mark enums as an arithmetic type +struct arithmetic {}; + +/// Mark a function for addition at the beginning of the existing overload chain instead of the end +struct prepend {}; + +/** \rst + A call policy which places one or more guard variables (``Ts...``) around the function call. + + For example, this definition: + + .. code-block:: cpp + + m.def("foo", foo, py::call_guard()); + + is equivalent to the following pseudocode: + + .. code-block:: cpp + + m.def("foo", [](args...) { + T scope_guard; + return foo(args...); // forwarded arguments + }); + \endrst */ +template +struct call_guard; + +template <> +struct call_guard<> { + using type = detail::void_type; +}; + +template +struct call_guard { + static_assert(std::is_default_constructible::value, + "The guard type must be default constructible"); + + using type = T; +}; + +template +struct call_guard { + struct type { + T guard{}; // Compose multiple guard types with left-to-right default-constructor order + typename call_guard::type next{}; + }; +}; + +/// @} annotations + +PYBIND11_NAMESPACE_BEGIN(detail) +/* Forward declarations */ +enum op_id : int; +enum op_type : int; +struct undefined_t; +template +struct op_; +void keep_alive_impl(size_t Nurse, size_t Patient, function_call &call, handle ret); + +/// Internal data structure which holds metadata about a keyword argument +struct argument_record { + const char *name; ///< Argument name + const char *descr; ///< Human-readable version of the argument value + handle value; ///< Associated Python object + bool convert : 1; ///< True if the argument is allowed to convert when loading + bool none : 1; ///< True if None is allowed when loading + + argument_record(const char *name, const char *descr, handle value, bool convert, bool none) + : name(name), descr(descr), value(value), convert(convert), none(none) {} +}; + +/// Internal data structure which holds metadata about a bound function (signature, overloads, +/// etc.) +struct function_record { + function_record() + : is_constructor(false), is_new_style_constructor(false), is_stateless(false), + is_operator(false), is_method(false), has_args(false), has_kwargs(false), + prepend(false) {} + + /// Function name + char *name = nullptr; /* why no C++ strings? They generate heavier code.. */ + + // User-specified documentation string + char *doc = nullptr; + + /// Human-readable version of the function signature + char *signature = nullptr; + + /// List of registered keyword arguments + std::vector args; + + /// Pointer to lambda function which converts arguments and performs the actual call + handle (*impl)(function_call &) = nullptr; + + /// Storage for the wrapped function pointer and captured data, if any + void *data[3] = {}; + + /// Pointer to custom destructor for 'data' (if needed) + void (*free_data)(function_record *ptr) = nullptr; + + /// Return value policy associated with this function + return_value_policy policy = return_value_policy::automatic; + + /// True if name == '__init__' + bool is_constructor : 1; + + /// True if this is a new-style `__init__` defined in `detail/init.h` + bool is_new_style_constructor : 1; + + /// True if this is a stateless function pointer + bool is_stateless : 1; + + /// True if this is an operator (__add__), etc. + bool is_operator : 1; + + /// True if this is a method + bool is_method : 1; + + /// True if the function has a '*args' argument + bool has_args : 1; + + /// True if the function has a '**kwargs' argument + bool has_kwargs : 1; + + /// True if this function is to be inserted at the beginning of the overload resolution chain + bool prepend : 1; + + /// Number of arguments (including py::args and/or py::kwargs, if present) + std::uint16_t nargs; + + /// Number of leading positional arguments, which are terminated by a py::args or py::kwargs + /// argument or by a py::kw_only annotation. + std::uint16_t nargs_pos = 0; + + /// Number of leading arguments (counted in `nargs`) that are positional-only + std::uint16_t nargs_pos_only = 0; + + /// Python method object + PyMethodDef *def = nullptr; + + /// Python handle to the parent scope (a class or a module) + handle scope; + + /// Python handle to the sibling function representing an overload chain + handle sibling; + + /// Pointer to next overload + function_record *next = nullptr; +}; + +/// Special data structure which (temporarily) holds metadata about a bound class +struct type_record { + PYBIND11_NOINLINE type_record() + : multiple_inheritance(false), dynamic_attr(false), buffer_protocol(false), + default_holder(true), module_local(false), is_final(false) {} + + /// Handle to the parent scope + handle scope; + + /// Name of the class + const char *name = nullptr; + + // Pointer to RTTI type_info data structure + const std::type_info *type = nullptr; + + /// How large is the underlying C++ type? + size_t type_size = 0; + + /// What is the alignment of the underlying C++ type? + size_t type_align = 0; + + /// How large is the type's holder? + size_t holder_size = 0; + + /// The global operator new can be overridden with a class-specific variant + void *(*operator_new)(size_t) = nullptr; + + /// Function pointer to class_<..>::init_instance + void (*init_instance)(instance *, const void *) = nullptr; + + /// Function pointer to class_<..>::dealloc + void (*dealloc)(detail::value_and_holder &) = nullptr; + + /// List of base classes of the newly created type + list bases; + + /// Optional docstring + const char *doc = nullptr; + + /// Custom metaclass (optional) + handle metaclass; + + /// Custom type setup. + custom_type_setup::callback custom_type_setup_callback; + + /// Multiple inheritance marker + bool multiple_inheritance : 1; + + /// Does the class manage a __dict__? + bool dynamic_attr : 1; + + /// Does the class implement the buffer protocol? + bool buffer_protocol : 1; + + /// Is the default (unique_ptr) holder type used? + bool default_holder : 1; + + /// Is the class definition local to the module shared object? + bool module_local : 1; + + /// Is the class inheritable from python classes? + bool is_final : 1; + + PYBIND11_NOINLINE void add_base(const std::type_info &base, void *(*caster)(void *) ) { + auto *base_info = detail::get_type_info(base, false); + if (!base_info) { + std::string tname(base.name()); + detail::clean_type_id(tname); + pybind11_fail("generic_type: type \"" + std::string(name) + + "\" referenced unknown base type \"" + tname + "\""); + } + + if (default_holder != base_info->default_holder) { + std::string tname(base.name()); + detail::clean_type_id(tname); + pybind11_fail("generic_type: type \"" + std::string(name) + "\" " + + (default_holder ? "does not have" : "has") + + " a non-default holder type while its base \"" + tname + "\" " + + (base_info->default_holder ? "does not" : "does")); + } + + bases.append((PyObject *) base_info->type); + + if (base_info->type->tp_dictoffset != 0) { + dynamic_attr = true; + } + + if (caster) { + base_info->implicit_casts.emplace_back(type, caster); + } + } +}; + +inline function_call::function_call(const function_record &f, handle p) : func(f), parent(p) { + args.reserve(f.nargs); + args_convert.reserve(f.nargs); +} + +/// Tag for a new-style `__init__` defined in `detail/init.h` +struct is_new_style_constructor {}; + +/** + * Partial template specializations to process custom attributes provided to + * cpp_function_ and class_. These are either used to initialize the respective + * fields in the type_record and function_record data structures or executed at + * runtime to deal with custom call policies (e.g. keep_alive). + */ +template +struct process_attribute; + +template +struct process_attribute_default { + /// Default implementation: do nothing + static void init(const T &, function_record *) {} + static void init(const T &, type_record *) {} + static void precall(function_call &) {} + static void postcall(function_call &, handle) {} +}; + +/// Process an attribute specifying the function's name +template <> +struct process_attribute : process_attribute_default { + static void init(const name &n, function_record *r) { r->name = const_cast(n.value); } +}; + +/// Process an attribute specifying the function's docstring +template <> +struct process_attribute : process_attribute_default { + static void init(const doc &n, function_record *r) { r->doc = const_cast(n.value); } +}; + +/// Process an attribute specifying the function's docstring (provided as a C-style string) +template <> +struct process_attribute : process_attribute_default { + static void init(const char *d, function_record *r) { r->doc = const_cast(d); } + static void init(const char *d, type_record *r) { r->doc = const_cast(d); } +}; +template <> +struct process_attribute : process_attribute {}; + +/// Process an attribute indicating the function's return value policy +template <> +struct process_attribute : process_attribute_default { + static void init(const return_value_policy &p, function_record *r) { r->policy = p; } +}; + +/// Process an attribute which indicates that this is an overloaded function associated with a +/// given sibling +template <> +struct process_attribute : process_attribute_default { + static void init(const sibling &s, function_record *r) { r->sibling = s.value; } +}; + +/// Process an attribute which indicates that this function is a method +template <> +struct process_attribute : process_attribute_default { + static void init(const is_method &s, function_record *r) { + r->is_method = true; + r->scope = s.class_; + } +}; + +/// Process an attribute which indicates the parent scope of a method +template <> +struct process_attribute : process_attribute_default { + static void init(const scope &s, function_record *r) { r->scope = s.value; } +}; + +/// Process an attribute which indicates that this function is an operator +template <> +struct process_attribute : process_attribute_default { + static void init(const is_operator &, function_record *r) { r->is_operator = true; } +}; + +template <> +struct process_attribute + : process_attribute_default { + static void init(const is_new_style_constructor &, function_record *r) { + r->is_new_style_constructor = true; + } +}; + +inline void check_kw_only_arg(const arg &a, function_record *r) { + if (r->args.size() > r->nargs_pos && (!a.name || a.name[0] == '\0')) { + pybind11_fail("arg(): cannot specify an unnamed argument after a kw_only() annotation or " + "args() argument"); + } +} + +inline void append_self_arg_if_needed(function_record *r) { + if (r->is_method && r->args.empty()) { + r->args.emplace_back("self", nullptr, handle(), /*convert=*/true, /*none=*/false); + } +} + +/// Process a keyword argument attribute (*without* a default value) +template <> +struct process_attribute : process_attribute_default { + static void init(const arg &a, function_record *r) { + append_self_arg_if_needed(r); + r->args.emplace_back(a.name, nullptr, handle(), !a.flag_noconvert, a.flag_none); + + check_kw_only_arg(a, r); + } +}; + +/// Process a keyword argument attribute (*with* a default value) +template <> +struct process_attribute : process_attribute_default { + static void init(const arg_v &a, function_record *r) { + if (r->is_method && r->args.empty()) { + r->args.emplace_back( + "self", /*descr=*/nullptr, /*parent=*/handle(), /*convert=*/true, /*none=*/false); + } + + if (!a.value) { +#if defined(PYBIND11_DETAILED_ERROR_MESSAGES) + std::string descr("'"); + if (a.name) { + descr += std::string(a.name) + ": "; + } + descr += a.type + "'"; + if (r->is_method) { + if (r->name) { + descr += " in method '" + (std::string) str(r->scope) + "." + + (std::string) r->name + "'"; + } else { + descr += " in method of '" + (std::string) str(r->scope) + "'"; + } + } else if (r->name) { + descr += " in function '" + (std::string) r->name + "'"; + } + pybind11_fail("arg(): could not convert default argument " + descr + + " into a Python object (type not registered yet?)"); +#else + pybind11_fail("arg(): could not convert default argument " + "into a Python object (type not registered yet?). " + "#define PYBIND11_DETAILED_ERROR_MESSAGES or compile in debug mode for " + "more information."); +#endif + } + r->args.emplace_back(a.name, a.descr, a.value.inc_ref(), !a.flag_noconvert, a.flag_none); + + check_kw_only_arg(a, r); + } +}; + +/// Process a keyword-only-arguments-follow pseudo argument +template <> +struct process_attribute : process_attribute_default { + static void init(const kw_only &, function_record *r) { + append_self_arg_if_needed(r); + if (r->has_args && r->nargs_pos != static_cast(r->args.size())) { + pybind11_fail("Mismatched args() and kw_only(): they must occur at the same relative " + "argument location (or omit kw_only() entirely)"); + } + r->nargs_pos = static_cast(r->args.size()); + } +}; + +/// Process a positional-only-argument maker +template <> +struct process_attribute : process_attribute_default { + static void init(const pos_only &, function_record *r) { + append_self_arg_if_needed(r); + r->nargs_pos_only = static_cast(r->args.size()); + if (r->nargs_pos_only > r->nargs_pos) { + pybind11_fail("pos_only(): cannot follow a py::args() argument"); + } + // It also can't follow a kw_only, but a static_assert in pybind11.h checks that + } +}; + +/// Process a parent class attribute. Single inheritance only (class_ itself already guarantees +/// that) +template +struct process_attribute::value>> + : process_attribute_default { + static void init(const handle &h, type_record *r) { r->bases.append(h); } +}; + +/// Process a parent class attribute (deprecated, does not support multiple inheritance) +template +struct process_attribute> : process_attribute_default> { + static void init(const base &, type_record *r) { r->add_base(typeid(T), nullptr); } +}; + +/// Process a multiple inheritance attribute +template <> +struct process_attribute : process_attribute_default { + static void init(const multiple_inheritance &, type_record *r) { + r->multiple_inheritance = true; + } +}; + +template <> +struct process_attribute : process_attribute_default { + static void init(const dynamic_attr &, type_record *r) { r->dynamic_attr = true; } +}; + +template <> +struct process_attribute { + static void init(const custom_type_setup &value, type_record *r) { + r->custom_type_setup_callback = value.value; + } +}; + +template <> +struct process_attribute : process_attribute_default { + static void init(const is_final &, type_record *r) { r->is_final = true; } +}; + +template <> +struct process_attribute : process_attribute_default { + static void init(const buffer_protocol &, type_record *r) { r->buffer_protocol = true; } +}; + +template <> +struct process_attribute : process_attribute_default { + static void init(const metaclass &m, type_record *r) { r->metaclass = m.value; } +}; + +template <> +struct process_attribute : process_attribute_default { + static void init(const module_local &l, type_record *r) { r->module_local = l.value; } +}; + +/// Process a 'prepend' attribute, putting this at the beginning of the overload chain +template <> +struct process_attribute : process_attribute_default { + static void init(const prepend &, function_record *r) { r->prepend = true; } +}; + +/// Process an 'arithmetic' attribute for enums (does nothing here) +template <> +struct process_attribute : process_attribute_default {}; + +template +struct process_attribute> : process_attribute_default> {}; + +/** + * Process a keep_alive call policy -- invokes keep_alive_impl during the + * pre-call handler if both Nurse, Patient != 0 and use the post-call handler + * otherwise + */ +template +struct process_attribute> + : public process_attribute_default> { + template = 0> + static void precall(function_call &call) { + keep_alive_impl(Nurse, Patient, call, handle()); + } + template = 0> + static void postcall(function_call &, handle) {} + template = 0> + static void precall(function_call &) {} + template = 0> + static void postcall(function_call &call, handle ret) { + keep_alive_impl(Nurse, Patient, call, ret); + } +}; + +/// Recursively iterate over variadic template arguments +template +struct process_attributes { + static void init(const Args &...args, function_record *r) { + PYBIND11_WORKAROUND_INCORRECT_MSVC_C4100(r); + PYBIND11_WORKAROUND_INCORRECT_GCC_UNUSED_BUT_SET_PARAMETER(r); + using expander = int[]; + (void) expander{ + 0, ((void) process_attribute::type>::init(args, r), 0)...}; + } + static void init(const Args &...args, type_record *r) { + PYBIND11_WORKAROUND_INCORRECT_MSVC_C4100(r); + PYBIND11_WORKAROUND_INCORRECT_GCC_UNUSED_BUT_SET_PARAMETER(r); + using expander = int[]; + (void) expander{0, + (process_attribute::type>::init(args, r), 0)...}; + } + static void precall(function_call &call) { + PYBIND11_WORKAROUND_INCORRECT_MSVC_C4100(call); + using expander = int[]; + (void) expander{0, + (process_attribute::type>::precall(call), 0)...}; + } + static void postcall(function_call &call, handle fn_ret) { + PYBIND11_WORKAROUND_INCORRECT_MSVC_C4100(call, fn_ret); + PYBIND11_WORKAROUND_INCORRECT_GCC_UNUSED_BUT_SET_PARAMETER(fn_ret); + using expander = int[]; + (void) expander{ + 0, (process_attribute::type>::postcall(call, fn_ret), 0)...}; + } +}; + +template +using is_call_guard = is_instantiation; + +/// Extract the ``type`` from the first `call_guard` in `Extras...` (or `void_type` if none found) +template +using extract_guard_t = typename exactly_one_t, Extra...>::type; + +/// Check the number of named arguments at compile time +template ::value...), + size_t self = constexpr_sum(std::is_same::value...)> +constexpr bool expected_num_args(size_t nargs, bool has_args, bool has_kwargs) { + PYBIND11_WORKAROUND_INCORRECT_MSVC_C4100(nargs, has_args, has_kwargs); + return named == 0 || (self + named + size_t(has_args) + size_t(has_kwargs)) == nargs; +} + +PYBIND11_NAMESPACE_END(detail) +PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE) diff --git a/third_party/pybind11/include/pybind11/buffer_info.h b/third_party/pybind11/include/pybind11/buffer_info.h new file mode 100644 index 0000000000..06120d5563 --- /dev/null +++ b/third_party/pybind11/include/pybind11/buffer_info.h @@ -0,0 +1,193 @@ +/* + pybind11/buffer_info.h: Python buffer object interface + + Copyright (c) 2016 Wenzel Jakob + + All rights reserved. Use of this source code is governed by a + BSD-style license that can be found in the LICENSE file. +*/ + +#pragma once + +#include "detail/common.h" + +PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE) + +PYBIND11_NAMESPACE_BEGIN(detail) + +// Default, C-style strides +inline std::vector c_strides(const std::vector &shape, ssize_t itemsize) { + auto ndim = shape.size(); + std::vector strides(ndim, itemsize); + if (ndim > 0) { + for (size_t i = ndim - 1; i > 0; --i) { + strides[i - 1] = strides[i] * shape[i]; + } + } + return strides; +} + +// F-style strides; default when constructing an array_t with `ExtraFlags & f_style` +inline std::vector f_strides(const std::vector &shape, ssize_t itemsize) { + auto ndim = shape.size(); + std::vector strides(ndim, itemsize); + for (size_t i = 1; i < ndim; ++i) { + strides[i] = strides[i - 1] * shape[i - 1]; + } + return strides; +} + +PYBIND11_NAMESPACE_END(detail) + +/// Information record describing a Python buffer object +struct buffer_info { + void *ptr = nullptr; // Pointer to the underlying storage + ssize_t itemsize = 0; // Size of individual items in bytes + ssize_t size = 0; // Total number of entries + std::string format; // For homogeneous buffers, this should be set to + // format_descriptor::format() + ssize_t ndim = 0; // Number of dimensions + std::vector shape; // Shape of the tensor (1 entry per dimension) + std::vector strides; // Number of bytes between adjacent entries + // (for each per dimension) + bool readonly = false; // flag to indicate if the underlying storage may be written to + + buffer_info() = default; + + buffer_info(void *ptr, + ssize_t itemsize, + const std::string &format, + ssize_t ndim, + detail::any_container shape_in, + detail::any_container strides_in, + bool readonly = false) + : ptr(ptr), itemsize(itemsize), size(1), format(format), ndim(ndim), + shape(std::move(shape_in)), strides(std::move(strides_in)), readonly(readonly) { + if (ndim != (ssize_t) shape.size() || ndim != (ssize_t) strides.size()) { + pybind11_fail("buffer_info: ndim doesn't match shape and/or strides length"); + } + for (size_t i = 0; i < (size_t) ndim; ++i) { + size *= shape[i]; + } + } + + template + buffer_info(T *ptr, + detail::any_container shape_in, + detail::any_container strides_in, + bool readonly = false) + : buffer_info(private_ctr_tag(), + ptr, + sizeof(T), + format_descriptor::format(), + static_cast(shape_in->size()), + std::move(shape_in), + std::move(strides_in), + readonly) {} + + buffer_info(void *ptr, + ssize_t itemsize, + const std::string &format, + ssize_t size, + bool readonly = false) + : buffer_info(ptr, itemsize, format, 1, {size}, {itemsize}, readonly) {} + + template + buffer_info(T *ptr, ssize_t size, bool readonly = false) + : buffer_info(ptr, sizeof(T), format_descriptor::format(), size, readonly) {} + + template + buffer_info(const T *ptr, ssize_t size, bool readonly = true) + : buffer_info( + const_cast(ptr), sizeof(T), format_descriptor::format(), size, readonly) {} + + explicit buffer_info(Py_buffer *view, bool ownview = true) + : buffer_info( + view->buf, + view->itemsize, + view->format, + view->ndim, + {view->shape, view->shape + view->ndim}, + /* Though buffer::request() requests PyBUF_STRIDES, ctypes objects + * ignore this flag and return a view with NULL strides. + * When strides are NULL, build them manually. */ + view->strides + ? std::vector(view->strides, view->strides + view->ndim) + : detail::c_strides({view->shape, view->shape + view->ndim}, view->itemsize), + (view->readonly != 0)) { + // NOLINTNEXTLINE(cppcoreguidelines-prefer-member-initializer) + this->m_view = view; + // NOLINTNEXTLINE(cppcoreguidelines-prefer-member-initializer) + this->ownview = ownview; + } + + buffer_info(const buffer_info &) = delete; + buffer_info &operator=(const buffer_info &) = delete; + + buffer_info(buffer_info &&other) noexcept { (*this) = std::move(other); } + + buffer_info &operator=(buffer_info &&rhs) noexcept { + ptr = rhs.ptr; + itemsize = rhs.itemsize; + size = rhs.size; + format = std::move(rhs.format); + ndim = rhs.ndim; + shape = std::move(rhs.shape); + strides = std::move(rhs.strides); + std::swap(m_view, rhs.m_view); + std::swap(ownview, rhs.ownview); + readonly = rhs.readonly; + return *this; + } + + ~buffer_info() { + if (m_view && ownview) { + PyBuffer_Release(m_view); + delete m_view; + } + } + + Py_buffer *view() const { return m_view; } + Py_buffer *&view() { return m_view; } + +private: + struct private_ctr_tag {}; + + buffer_info(private_ctr_tag, + void *ptr, + ssize_t itemsize, + const std::string &format, + ssize_t ndim, + detail::any_container &&shape_in, + detail::any_container &&strides_in, + bool readonly) + : buffer_info( + ptr, itemsize, format, ndim, std::move(shape_in), std::move(strides_in), readonly) {} + + Py_buffer *m_view = nullptr; + bool ownview = false; +}; + +PYBIND11_NAMESPACE_BEGIN(detail) + +template +struct compare_buffer_info { + static bool compare(const buffer_info &b) { + return b.format == format_descriptor::format() && b.itemsize == (ssize_t) sizeof(T); + } +}; + +template +struct compare_buffer_info::value>> { + static bool compare(const buffer_info &b) { + return (size_t) b.itemsize == sizeof(T) + && (b.format == format_descriptor::value + || ((sizeof(T) == sizeof(long)) + && b.format == (std::is_unsigned::value ? "L" : "l")) + || ((sizeof(T) == sizeof(size_t)) + && b.format == (std::is_unsigned::value ? "N" : "n"))); + } +}; + +PYBIND11_NAMESPACE_END(detail) +PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE) diff --git a/third_party/pybind11/include/pybind11/cast.h b/third_party/pybind11/include/pybind11/cast.h new file mode 100644 index 0000000000..9a971704e4 --- /dev/null +++ b/third_party/pybind11/include/pybind11/cast.h @@ -0,0 +1,1665 @@ +/* + pybind11/cast.h: Partial template specializations to cast between + C++ and Python types + + Copyright (c) 2016 Wenzel Jakob + + All rights reserved. Use of this source code is governed by a + BSD-style license that can be found in the LICENSE file. +*/ + +#pragma once + +#include "detail/common.h" +#include "detail/descr.h" +#include "detail/type_caster_base.h" +#include "detail/typeid.h" +#include "pytypes.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE) +PYBIND11_NAMESPACE_BEGIN(detail) + +template +class type_caster : public type_caster_base {}; +template +using make_caster = type_caster>; + +// Shortcut for calling a caster's `cast_op_type` cast operator for casting a type_caster to a T +template +typename make_caster::template cast_op_type cast_op(make_caster &caster) { + return caster.operator typename make_caster::template cast_op_type(); +} +template +typename make_caster::template cast_op_type::type> +cast_op(make_caster &&caster) { + return std::move(caster).operator typename make_caster:: + template cast_op_type::type>(); +} + +template +class type_caster> { +private: + using caster_t = make_caster; + caster_t subcaster; + using reference_t = type &; + using subcaster_cast_op_type = typename caster_t::template cast_op_type; + + static_assert( + std::is_same::type &, subcaster_cast_op_type>::value + || std::is_same::value, + "std::reference_wrapper caster requires T to have a caster with an " + "`operator T &()` or `operator const T &()`"); + +public: + bool load(handle src, bool convert) { return subcaster.load(src, convert); } + static constexpr auto name = caster_t::name; + static handle + cast(const std::reference_wrapper &src, return_value_policy policy, handle parent) { + // It is definitely wrong to take ownership of this pointer, so mask that rvp + if (policy == return_value_policy::take_ownership + || policy == return_value_policy::automatic) { + policy = return_value_policy::automatic_reference; + } + return caster_t::cast(&src.get(), policy, parent); + } + template + using cast_op_type = std::reference_wrapper; + explicit operator std::reference_wrapper() { return cast_op(subcaster); } +}; + +#define PYBIND11_TYPE_CASTER(type, py_name) \ +protected: \ + type value; \ + \ +public: \ + static constexpr auto name = py_name; \ + template >::value, \ + int> = 0> \ + static ::pybind11::handle cast( \ + T_ *src, ::pybind11::return_value_policy policy, ::pybind11::handle parent) { \ + if (!src) \ + return ::pybind11::none().release(); \ + if (policy == ::pybind11::return_value_policy::take_ownership) { \ + auto h = cast(std::move(*src), policy, parent); \ + delete src; \ + return h; \ + } \ + return cast(*src, policy, parent); \ + } \ + operator type *() { return &value; } /* NOLINT(bugprone-macro-parentheses) */ \ + operator type &() { return value; } /* NOLINT(bugprone-macro-parentheses) */ \ + operator type &&() && { return std::move(value); } /* NOLINT(bugprone-macro-parentheses) */ \ + template \ + using cast_op_type = ::pybind11::detail::movable_cast_op_type + +template +using is_std_char_type = any_of, /* std::string */ +#if defined(PYBIND11_HAS_U8STRING) + std::is_same, /* std::u8string */ +#endif + std::is_same, /* std::u16string */ + std::is_same, /* std::u32string */ + std::is_same /* std::wstring */ + >; + +template +struct type_caster::value && !is_std_char_type::value>> { + using _py_type_0 = conditional_t; + using _py_type_1 = conditional_t::value, + _py_type_0, + typename std::make_unsigned<_py_type_0>::type>; + using py_type = conditional_t::value, double, _py_type_1>; + +public: + bool load(handle src, bool convert) { + py_type py_value; + + if (!src) { + return false; + } + +#if !defined(PYPY_VERSION) + auto index_check = [](PyObject *o) { return PyIndex_Check(o); }; +#else + // In PyPy 7.3.3, `PyIndex_Check` is implemented by calling `__index__`, + // while CPython only considers the existence of `nb_index`/`__index__`. + auto index_check = [](PyObject *o) { return hasattr(o, "__index__"); }; +#endif + + if (std::is_floating_point::value) { + if (convert || PyFloat_Check(src.ptr())) { + py_value = (py_type) PyFloat_AsDouble(src.ptr()); + } else { + return false; + } + } else if (PyFloat_Check(src.ptr()) + || (!convert && !PYBIND11_LONG_CHECK(src.ptr()) && !index_check(src.ptr()))) { + return false; + } else { + handle src_or_index = src; + // PyPy: 7.3.7's 3.8 does not implement PyLong_*'s __index__ calls. +#if PY_VERSION_HEX < 0x03080000 || defined(PYPY_VERSION) + object index; + if (!PYBIND11_LONG_CHECK(src.ptr())) { // So: index_check(src.ptr()) + index = reinterpret_steal(PyNumber_Index(src.ptr())); + if (!index) { + PyErr_Clear(); + if (!convert) + return false; + } else { + src_or_index = index; + } + } +#endif + if (std::is_unsigned::value) { + py_value = as_unsigned(src_or_index.ptr()); + } else { // signed integer: + py_value = sizeof(T) <= sizeof(long) + ? (py_type) PyLong_AsLong(src_or_index.ptr()) + : (py_type) PYBIND11_LONG_AS_LONGLONG(src_or_index.ptr()); + } + } + + // Python API reported an error + bool py_err = py_value == (py_type) -1 && PyErr_Occurred(); + + // Check to see if the conversion is valid (integers should match exactly) + // Signed/unsigned checks happen elsewhere + if (py_err + || (std::is_integral::value && sizeof(py_type) != sizeof(T) + && py_value != (py_type) (T) py_value)) { + PyErr_Clear(); + if (py_err && convert && (PyNumber_Check(src.ptr()) != 0)) { + auto tmp = reinterpret_steal(std::is_floating_point::value + ? PyNumber_Float(src.ptr()) + : PyNumber_Long(src.ptr())); + PyErr_Clear(); + return load(tmp, false); + } + return false; + } + + value = (T) py_value; + return true; + } + + template + static typename std::enable_if::value, handle>::type + cast(U src, return_value_policy /* policy */, handle /* parent */) { + return PyFloat_FromDouble((double) src); + } + + template + static typename std::enable_if::value && std::is_signed::value + && (sizeof(U) <= sizeof(long)), + handle>::type + cast(U src, return_value_policy /* policy */, handle /* parent */) { + return PYBIND11_LONG_FROM_SIGNED((long) src); + } + + template + static typename std::enable_if::value && std::is_unsigned::value + && (sizeof(U) <= sizeof(unsigned long)), + handle>::type + cast(U src, return_value_policy /* policy */, handle /* parent */) { + return PYBIND11_LONG_FROM_UNSIGNED((unsigned long) src); + } + + template + static typename std::enable_if::value && std::is_signed::value + && (sizeof(U) > sizeof(long)), + handle>::type + cast(U src, return_value_policy /* policy */, handle /* parent */) { + return PyLong_FromLongLong((long long) src); + } + + template + static typename std::enable_if::value && std::is_unsigned::value + && (sizeof(U) > sizeof(unsigned long)), + handle>::type + cast(U src, return_value_policy /* policy */, handle /* parent */) { + return PyLong_FromUnsignedLongLong((unsigned long long) src); + } + + PYBIND11_TYPE_CASTER(T, const_name::value>("int", "float")); +}; + +template +struct void_caster { +public: + bool load(handle src, bool) { + if (src && src.is_none()) { + return true; + } + return false; + } + static handle cast(T, return_value_policy /* policy */, handle /* parent */) { + return none().inc_ref(); + } + PYBIND11_TYPE_CASTER(T, const_name("None")); +}; + +template <> +class type_caster : public void_caster {}; + +template <> +class type_caster : public type_caster { +public: + using type_caster::cast; + + bool load(handle h, bool) { + if (!h) { + return false; + } + if (h.is_none()) { + value = nullptr; + return true; + } + + /* Check if this is a capsule */ + if (isinstance(h)) { + value = reinterpret_borrow(h); + return true; + } + + /* Check if this is a C++ type */ + const auto &bases = all_type_info((PyTypeObject *) type::handle_of(h).ptr()); + if (bases.size() == 1) { // Only allowing loading from a single-value type + value = values_and_holders(reinterpret_cast(h.ptr())).begin()->value_ptr(); + return true; + } + + /* Fail */ + return false; + } + + static handle cast(const void *ptr, return_value_policy /* policy */, handle /* parent */) { + if (ptr) { + return capsule(ptr).release(); + } + return none().inc_ref(); + } + + template + using cast_op_type = void *&; + explicit operator void *&() { return value; } + static constexpr auto name = const_name("capsule"); + +private: + void *value = nullptr; +}; + +template <> +class type_caster : public void_caster {}; + +template <> +class type_caster { +public: + bool load(handle src, bool convert) { + if (!src) { + return false; + } + if (src.ptr() == Py_True) { + value = true; + return true; + } + if (src.ptr() == Py_False) { + value = false; + return true; + } + if (convert || (std::strcmp("numpy.bool_", Py_TYPE(src.ptr())->tp_name) == 0)) { + // (allow non-implicit conversion for numpy booleans) + + Py_ssize_t res = -1; + if (src.is_none()) { + res = 0; // None is implicitly converted to False + } +#if defined(PYPY_VERSION) + // On PyPy, check that "__bool__" attr exists + else if (hasattr(src, PYBIND11_BOOL_ATTR)) { + res = PyObject_IsTrue(src.ptr()); + } +#else + // Alternate approach for CPython: this does the same as the above, but optimized + // using the CPython API so as to avoid an unneeded attribute lookup. + else if (auto *tp_as_number = src.ptr()->ob_type->tp_as_number) { + if (PYBIND11_NB_BOOL(tp_as_number)) { + res = (*PYBIND11_NB_BOOL(tp_as_number))(src.ptr()); + } + } +#endif + if (res == 0 || res == 1) { + value = (res != 0); + return true; + } + PyErr_Clear(); + } + return false; + } + static handle cast(bool src, return_value_policy /* policy */, handle /* parent */) { + return handle(src ? Py_True : Py_False).inc_ref(); + } + PYBIND11_TYPE_CASTER(bool, const_name("bool")); +}; + +// Helper class for UTF-{8,16,32} C++ stl strings: +template +struct string_caster { + using CharT = typename StringType::value_type; + + // Simplify life by being able to assume standard char sizes (the standard only guarantees + // minimums, but Python requires exact sizes) + static_assert(!std::is_same::value || sizeof(CharT) == 1, + "Unsupported char size != 1"); +#if defined(PYBIND11_HAS_U8STRING) + static_assert(!std::is_same::value || sizeof(CharT) == 1, + "Unsupported char8_t size != 1"); +#endif + static_assert(!std::is_same::value || sizeof(CharT) == 2, + "Unsupported char16_t size != 2"); + static_assert(!std::is_same::value || sizeof(CharT) == 4, + "Unsupported char32_t size != 4"); + // wchar_t can be either 16 bits (Windows) or 32 (everywhere else) + static_assert(!std::is_same::value || sizeof(CharT) == 2 || sizeof(CharT) == 4, + "Unsupported wchar_t size != 2/4"); + static constexpr size_t UTF_N = 8 * sizeof(CharT); + + bool load(handle src, bool) { + handle load_src = src; + if (!src) { + return false; + } + if (!PyUnicode_Check(load_src.ptr())) { + return load_raw(load_src); + } + + // For UTF-8 we avoid the need for a temporary `bytes` object by using + // `PyUnicode_AsUTF8AndSize`. + if (PYBIND11_SILENCE_MSVC_C4127(UTF_N == 8)) { + Py_ssize_t size = -1; + const auto *buffer + = reinterpret_cast(PyUnicode_AsUTF8AndSize(load_src.ptr(), &size)); + if (!buffer) { + PyErr_Clear(); + return false; + } + value = StringType(buffer, static_cast(size)); + return true; + } + + auto utfNbytes + = reinterpret_steal(PyUnicode_AsEncodedString(load_src.ptr(), + UTF_N == 8 ? "utf-8" + : UTF_N == 16 ? "utf-16" + : "utf-32", + nullptr)); + if (!utfNbytes) { + PyErr_Clear(); + return false; + } + + const auto *buffer + = reinterpret_cast(PYBIND11_BYTES_AS_STRING(utfNbytes.ptr())); + size_t length = (size_t) PYBIND11_BYTES_SIZE(utfNbytes.ptr()) / sizeof(CharT); + // Skip BOM for UTF-16/32 + if (PYBIND11_SILENCE_MSVC_C4127(UTF_N > 8)) { + buffer++; + length--; + } + value = StringType(buffer, length); + + // If we're loading a string_view we need to keep the encoded Python object alive: + if (IsView) { + loader_life_support::add_patient(utfNbytes); + } + + return true; + } + + static handle + cast(const StringType &src, return_value_policy /* policy */, handle /* parent */) { + const char *buffer = reinterpret_cast(src.data()); + auto nbytes = ssize_t(src.size() * sizeof(CharT)); + handle s = decode_utfN(buffer, nbytes); + if (!s) { + throw error_already_set(); + } + return s; + } + + PYBIND11_TYPE_CASTER(StringType, const_name(PYBIND11_STRING_NAME)); + +private: + static handle decode_utfN(const char *buffer, ssize_t nbytes) { +#if !defined(PYPY_VERSION) + return UTF_N == 8 ? PyUnicode_DecodeUTF8(buffer, nbytes, nullptr) + : UTF_N == 16 ? PyUnicode_DecodeUTF16(buffer, nbytes, nullptr, nullptr) + : PyUnicode_DecodeUTF32(buffer, nbytes, nullptr, nullptr); +#else + // PyPy segfaults when on PyUnicode_DecodeUTF16 (and possibly on PyUnicode_DecodeUTF32 as + // well), so bypass the whole thing by just passing the encoding as a string value, which + // works properly: + return PyUnicode_Decode(buffer, + nbytes, + UTF_N == 8 ? "utf-8" + : UTF_N == 16 ? "utf-16" + : "utf-32", + nullptr); +#endif + } + + // When loading into a std::string or char*, accept a bytes/bytearray object as-is (i.e. + // without any encoding/decoding attempt). For other C++ char sizes this is a no-op. + // which supports loading a unicode from a str, doesn't take this path. + template + bool load_raw(enable_if_t::value, handle> src) { + if (PYBIND11_BYTES_CHECK(src.ptr())) { + // We were passed raw bytes; accept it into a std::string or char* + // without any encoding attempt. + const char *bytes = PYBIND11_BYTES_AS_STRING(src.ptr()); + if (!bytes) { + pybind11_fail("Unexpected PYBIND11_BYTES_AS_STRING() failure."); + } + value = StringType(bytes, (size_t) PYBIND11_BYTES_SIZE(src.ptr())); + return true; + } + if (PyByteArray_Check(src.ptr())) { + // We were passed a bytearray; accept it into a std::string or char* + // without any encoding attempt. + const char *bytearray = PyByteArray_AsString(src.ptr()); + if (!bytearray) { + pybind11_fail("Unexpected PyByteArray_AsString() failure."); + } + value = StringType(bytearray, (size_t) PyByteArray_Size(src.ptr())); + return true; + } + + return false; + } + + template + bool load_raw(enable_if_t::value, handle>) { + return false; + } +}; + +template +struct type_caster, + enable_if_t::value>> + : string_caster> {}; + +#ifdef PYBIND11_HAS_STRING_VIEW +template +struct type_caster, + enable_if_t::value>> + : string_caster, true> {}; +#endif + +// Type caster for C-style strings. We basically use a std::string type caster, but also add the +// ability to use None as a nullptr char* (which the string caster doesn't allow). +template +struct type_caster::value>> { + using StringType = std::basic_string; + using StringCaster = make_caster; + StringCaster str_caster; + bool none = false; + CharT one_char = 0; + +public: + bool load(handle src, bool convert) { + if (!src) { + return false; + } + if (src.is_none()) { + // Defer accepting None to other overloads (if we aren't in convert mode): + if (!convert) { + return false; + } + none = true; + return true; + } + return str_caster.load(src, convert); + } + + static handle cast(const CharT *src, return_value_policy policy, handle parent) { + if (src == nullptr) { + return pybind11::none().inc_ref(); + } + return StringCaster::cast(StringType(src), policy, parent); + } + + static handle cast(CharT src, return_value_policy policy, handle parent) { + if (std::is_same::value) { + handle s = PyUnicode_DecodeLatin1((const char *) &src, 1, nullptr); + if (!s) { + throw error_already_set(); + } + return s; + } + return StringCaster::cast(StringType(1, src), policy, parent); + } + + explicit operator CharT *() { + return none ? nullptr : const_cast(static_cast(str_caster).c_str()); + } + explicit operator CharT &() { + if (none) { + throw value_error("Cannot convert None to a character"); + } + + auto &value = static_cast(str_caster); + size_t str_len = value.size(); + if (str_len == 0) { + throw value_error("Cannot convert empty string to a character"); + } + + // If we're in UTF-8 mode, we have two possible failures: one for a unicode character that + // is too high, and one for multiple unicode characters (caught later), so we need to + // figure out how long the first encoded character is in bytes to distinguish between these + // two errors. We also allow want to allow unicode characters U+0080 through U+00FF, as + // those can fit into a single char value. + if (PYBIND11_SILENCE_MSVC_C4127(StringCaster::UTF_N == 8) && str_len > 1 && str_len <= 4) { + auto v0 = static_cast(value[0]); + // low bits only: 0-127 + // 0b110xxxxx - start of 2-byte sequence + // 0b1110xxxx - start of 3-byte sequence + // 0b11110xxx - start of 4-byte sequence + size_t char0_bytes = (v0 & 0x80) == 0 ? 1 + : (v0 & 0xE0) == 0xC0 ? 2 + : (v0 & 0xF0) == 0xE0 ? 3 + : 4; + + if (char0_bytes == str_len) { + // If we have a 128-255 value, we can decode it into a single char: + if (char0_bytes == 2 && (v0 & 0xFC) == 0xC0) { // 0x110000xx 0x10xxxxxx + one_char = static_cast(((v0 & 3) << 6) + + (static_cast(value[1]) & 0x3F)); + return one_char; + } + // Otherwise we have a single character, but it's > U+00FF + throw value_error("Character code point not in range(0x100)"); + } + } + + // UTF-16 is much easier: we can only have a surrogate pair for values above U+FFFF, thus a + // surrogate pair with total length 2 instantly indicates a range error (but not a "your + // string was too long" error). + else if (PYBIND11_SILENCE_MSVC_C4127(StringCaster::UTF_N == 16) && str_len == 2) { + one_char = static_cast(value[0]); + if (one_char >= 0xD800 && one_char < 0xE000) { + throw value_error("Character code point not in range(0x10000)"); + } + } + + if (str_len != 1) { + throw value_error("Expected a character, but multi-character string found"); + } + + one_char = value[0]; + return one_char; + } + + static constexpr auto name = const_name(PYBIND11_STRING_NAME); + template + using cast_op_type = pybind11::detail::cast_op_type<_T>; +}; + +// Base implementation for std::tuple and std::pair +template