pd: add CPP inference with LAMMPS (#4467)

1. support LAMMPS inference with Paddle backend on cuda and dcu device 2. Fix `deepmd/pd/utils/serialization.py` and paddle inference files can be deserialized using `dp convert-backend deeppot_sea.yaml deeppot_sea.json` related PR: 1. PaddlePaddle/Paddle#70545  ## Summary by CodeRabbit Based on the comprehensive changes, here are the updated release notes: - **New Features** - Added support for PaddlePaddle deep learning framework. - Introduced new Paddle-based deep potential model computations. - Expanded backend support for model inference with Paddle. - **Configuration** - New CMake option `ENABLE_PADDLE` to toggle Paddle support. - Added configuration parameters for Paddle version and inference directory. - **Testing** - Comprehensive test suites added for Paddle backend. - Enhanced LAMMPS integration tests with Paddle support. - **Documentation** - Updated version header and configuration files to reflect Paddle integration. - **Performance** - Added JIT compilation for Paddle model methods. - Optimized model serialization and deserialization.  --------- Signed-off-by: HydrogenSulfate <[email protected]> Signed-off-by: Jinzhe Zeng <[email protected]> Co-authored-by: Jinzhe Zeng <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
deepmodeling · Jan 13, 2025 · b7effe5 · b7effe5
1 parent 65ca05a
commit b7effe5
Show file tree

Hide file tree

Showing 24 changed files with 2,582 additions and 13 deletions.
diff --git a/.devcontainer/build_cxx.sh b/.devcontainer/build_cxx.sh
@@ -11,6 +11,7 @@ mkdir -p ${SCRIPT_PATH}/../buildcxx/
 cd ${SCRIPT_PATH}/../buildcxx/
 cmake -D ENABLE_TENSORFLOW=ON \
 	-D ENABLE_PYTORCH=ON \
+	-D ENABLE_PADDLE=ON \
 	-D CMAKE_INSTALL_PREFIX=${SCRIPT_PATH}/../dp/ \
 	-D LAMMPS_VERSION=stable_29Aug2024_update1 \
 	-D CMAKE_BUILD_TYPE=Debug \

diff --git a/.github/workflows/suppr.txt b/.github/workflows/suppr.txt
@@ -0,0 +1 @@
+leak:libpaddle_inference
diff --git a/.github/workflows/test_cc.yml b/.github/workflows/test_cc.yml
@@ -52,8 +52,13 @@ jobs:
         LMP_CXX11_ABI_0: 1
         CMAKE_GENERATOR: Ninja
         CXXFLAGS: ${{ matrix.check_memleak && '-fsanitize=leak' || '' }}
+        LSAN_OPTIONS: suppressions=${{ github.workspace }}/.github/workflows/suppr.txt
     # test lammps
-    - run: pytest --cov=deepmd source/lmp/tests
+    - run: |
+        cp ${{ github.workspace }}/source/build_tests/paddle_inference_install_dir/paddle/lib/*.so ${{ github.workspace }}/dp_test/lib/
+        cp ${{ github.workspace }}/source/build_tests/paddle_inference_install_dir/third_party/install/onednn/lib/* ${{ github.workspace }}/dp_test/lib/
+        cp ${{ github.workspace }}/source/build_tests/paddle_inference_install_dir/third_party/install/mklml/lib/* ${{ github.workspace }}/dp_test/lib/
+        pytest --cov=deepmd source/lmp/tests
       env:
         OMP_NUM_THREADS: 1
         TF_INTRA_OP_PARALLELISM_THREADS: 1
@@ -62,12 +67,16 @@ jobs:
         LD_LIBRARY_PATH: ${{ github.workspace }}/dp_test/lib:${{ github.workspace }}/libtorch/lib
       if: ${{ !matrix.check_memleak }}
     # test ipi
-    - run: pytest --cov=deepmd source/ipi/tests
+    - run: |
+        export PATH=${{ github.workspace }}/dp_test/bin:$PATH
+        cp ${{ github.workspace }}/source/build_tests/paddle_inference_install_dir/paddle/lib/*.so ${{ github.workspace }}/dp_test/lib/
+        cp ${{ github.workspace }}/source/build_tests/paddle_inference_install_dir/third_party/install/onednn/lib/* ${{ github.workspace }}/dp_test/lib/
+        cp ${{ github.workspace }}/source/build_tests/paddle_inference_install_dir/third_party/install/mklml/lib/* ${{ github.workspace }}/dp_test/lib/
+        pytest --cov=deepmd source/ipi/tests
       env:
         OMP_NUM_THREADS: 1
         TF_INTRA_OP_PARALLELISM_THREADS: 1
         TF_INTER_OP_PARALLELISM_THREADS: 1
-        PATH: ${{ github.workspace }}/dp_test/bin:$PATH
         LD_LIBRARY_PATH: ${{ github.workspace }}/dp_test/lib:${{ github.workspace }}/libtorch/lib
       if: ${{ !matrix.check_memleak }}
     - uses: codecov/codecov-action@v5

diff --git a/.github/workflows/test_cuda.yml b/.github/workflows/test_cuda.yml
@@ -85,6 +85,9 @@ jobs:
     - run: |
         export LD_LIBRARY_PATH=$CUDA_PATH/lib64:/usr/lib/x86_64-linux-gnu/:$GITHUB_WORKSPACE/dp_test/lib:$GITHUB_WORKSPACE/libtorch/lib:$LD_LIBRARY_PATH
         export PATH=$GITHUB_WORKSPACE/dp_test/bin:$PATH
+        cp $GITHUB_WORKSPACE/source/build_tests/paddle_inference_install_dir/paddle/lib/* $GITHUB_WORKSPACE/dp_test/lib/
+        cp $GITHUB_WORKSPACE/source/build_tests/paddle_inference_install_dir/third_party/install/onednn/lib/* $GITHUB_WORKSPACE/dp_test/lib/
+        cp $GITHUB_WORKSPACE/source/build_tests/paddle_inference_install_dir/third_party/install/mklml/lib/* $GITHUB_WORKSPACE/dp_test/lib/
         python -m pytest -s source/lmp/tests || (cat log.lammps && exit 1)
         python -m pytest source/ipi/tests
       env:

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -7,7 +7,7 @@ repos:
       - id: trailing-whitespace
         exclude: "^.+\\.pbtxt$"
       - id: end-of-file-fixer
-        exclude: "^.+\\.pbtxt$"
+        exclude: "^.+\\.pbtxt$|deeppot_sea.*\\.json$"
       - id: check-yaml
       - id: check-json
       - id: check-added-large-files
@@ -63,7 +63,7 @@ repos:
     rev: v19.1.6
     hooks:
       - id: clang-format
-        exclude: ^(source/3rdparty|source/lib/src/gpu/cudart/.+\.inc|.+\.ipynb$)
+        exclude: ^(source/3rdparty|source/lib/src/gpu/cudart/.+\.inc|.+\.ipynb$|.+\.json$)
   # markdown, yaml, CSS, javascript
   - repo: https://github.com/pre-commit/mirrors-prettier
     rev: v4.0.0-alpha.8

diff --git a/deepmd/pd/utils/serialization.py b/deepmd/pd/utils/serialization.py
@@ -1,5 +1,4 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-import json
 
 import paddle
 
@@ -34,21 +33,71 @@ def deserialize_to_file(model_file: str, data: dict) -> None:
     data : dict
         The dictionary to be deserialized.
     """
+    paddle.framework.core._set_prim_all_enabled(True)
     if not model_file.endswith(".json"):
         raise ValueError("Paddle backend only supports converting .json file")
-    model = BaseModel.deserialize(data["model"])
+    model: paddle.nn.Layer = BaseModel.deserialize(data["model"])
+    model.eval()
     # JIT will happy in this way...
-    model.model_def_script = json.dumps(data["model_def_script"])
     if "min_nbor_dist" in data.get("@variables", {}):
-        model.min_nbor_dist = float(data["@variables"]["min_nbor_dist"])
-    # model = paddle.jit.to_static(model)
+        model.register_buffer(
+            "buffer_min_nbor_dist",
+            paddle.to_tensor(
+                float(data["@variables"]["min_nbor_dist"]),
+            ),
+        )
     paddle.set_flags(
         {
             "FLAGS_save_cf_stack_op": 1,
             "FLAGS_prim_enable_dynamic": 1,
             "FLAGS_enable_pir_api": 1,
         }
     )
+    from paddle.static import (
+        InputSpec,
+    )
+
+    """ example output shape and dtype of forward
+    atom_energy: fetch_name_0 (1, 6, 1) float64
+    atom_virial: fetch_name_1 (1, 6, 1, 9) float64
+    energy: fetch_name_2 (1, 1) float64
+    force: fetch_name_3 (1, 6, 3) float64
+    mask: fetch_name_4 (1, 6) int32
+    virial: fetch_name_5 (1, 9) float64
+    """
+    model.forward = paddle.jit.to_static(
+        model.forward,
+        full_graph=True,
+        input_spec=[
+            InputSpec([1, -1, 3], dtype="float64", name="coord"),
+            InputSpec([1, -1], dtype="int64", name="atype"),
+            InputSpec([1, 9], dtype="float64", name="box"),
+            None,
+            None,
+            True,
+        ],
+    )
+    """ example output shape and dtype of forward_lower
+    fetch_name_0: atom_energy [1, 192, 1] paddle.float64
+    fetch_name_1: energy [1, 1] paddle.float64
+    fetch_name_2: extended_force [1, 5184, 3] paddle.float64
+    fetch_name_3: extended_virial [1, 5184, 1, 9] paddle.float64
+    fetch_name_4: virial [1, 9] paddle.float64
+    """
+    model.forward_lower = paddle.jit.to_static(
+        model.forward_lower,
+        full_graph=True,
+        input_spec=[
+            InputSpec([1, -1, 3], dtype="float64", name="coord"),
+            InputSpec([1, -1], dtype="int32", name="atype"),
+            InputSpec([1, -1, -1], dtype="int32", name="nlist"),
+            None,
+            None,
+            None,
+            True,
+            None,
+        ],
+    )
     paddle.jit.save(
         model,
         model_file.split(".json")[0],

diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt
@@ -9,6 +9,7 @@ if(ENABLE_TENSORFLOW)
   # JAX requires TF C interface, contained in TF C++ library
   set(ENABLE_JAX ON)
 endif()
+option(ENABLE_PADDLE "Enable Paddle interface" OFF)
 option(BUILD_TESTING "Build test and enable coverage" OFF)
 set(DEEPMD_C_ROOT
     ""
@@ -26,6 +27,133 @@ if(NOT DEEPMD_C_ROOT)
   set_if_higher(CMAKE_CXX_STANDARD 14)
 endif()
 
+if(ENABLE_PADDLE)
+  if(NOT DEFINED PADDLE_INFERENCE_DIR)
+    # message(FATAL_ERROR "Make sure PADDLE_INFERENCE_DIR is set when
+    # ENABLE_PADDLE=ON")
+    if(USE_CUDA_TOOLKIT)
+      find_package(CUDAToolkit REQUIRED)
+      string(REGEX MATCH "^[0-9]+" CUDA_MAJOR_VERSION "${CUDAToolkit_VERSION}")
+      message(STATUS "Find CUDAToolkit_VERSION: ${CUDAToolkit_VERSION}")
+      if(CUDA_MAJOR_VERSION VERSION_EQUAL "11")
+        message(
+          STATUS
+            "PADDLE_INFERENCE_DIR is not defined, downloading CUDA11.8 infernece lib to: ${CMAKE_BINARY_DIR}/"
+        )
+        set(DOWNLOAD_URL
+            "https://paddle-qa.bj.bcebos.com/paddle-pipeline/GITHUB_Docker_Compile_Test_Cuda118_cudnn860_Trt8531_D1/latest/paddle_inference.tgz"
+        )
+      elseif(CUDA_MAJOR_VERSION VERSION_EQUAL "12")
+        message(
+          STATUS
+            "PADDLE_INFERENCE_DIR is not defined, downloading CUDA12.3 infernece lib to: ${CMAKE_BINARY_DIR}/"
+        )
+        set(DOWNLOAD_URL
+            "https://paddle-qa.bj.bcebos.com/paddle-pipeline/GITHUB_Docker_Compile_Test_Cuda123_cudnn900_Trt8616_D1/latest/paddle_inference.tgz"
+        )
+      else()
+        message(
+          FATAL_ERROR
+            "Paddle inference lib only support cuda 11 or 12, but your CUDA_MAJOR_VERSION is: ${CUDA_MAJOR_VERSION}"
+        )
+      endif()
+    else()
+      message(
+        STATUS
+          "PADDLE_INFERENCE_DIR is not defined, downloading CPU infernece lib to: ${CMAKE_BINARY_DIR}/"
+      )
+      set(DOWNLOAD_URL
+          "https://paddle-qa.bj.bcebos.com/paddle-pipeline/GITHUB_Docker_Compile_Test_Cpu_Mkl_Avx_D1/latest/paddle_inference.tgz"
+      )
+    endif()
+    set(TGZ_FILE "${CMAKE_BINARY_DIR}/paddle_inference.tgz")
+    set(EXTRACTED_DIR "${CMAKE_BINARY_DIR}/paddle_inference_install_dir")
+    file(DOWNLOAD ${DOWNLOAD_URL} ${TGZ_FILE})
+    message(STATUS "Downloading finished, extracting...")
+    execute_process(COMMAND ${CMAKE_COMMAND} -E tar -xzvf ${TGZ_FILE}
+                    OUTPUT_QUIET)
+    file(REMOVE ${TGZ_FILE})
+    set(PADDLE_INFERENCE_DIR
+        ${EXTRACTED_DIR}
+        CACHE PATH
+              "Path to 'paddle_inference_install_dir' or 'paddle_inference'")
+  else()
+    message(
+      STATUS "PADDLE_INFERENCE_DIR is already defined: ${PADDLE_INFERENCE_DIR}")
+  endif()
+
+  link_directories(
+    ${PADDLE_INFERENCE_DIR}/paddle/lib
+    ${PADDLE_INFERENCE_DIR}/third_party/install/onednn/lib
+    ${PADDLE_INFERENCE_DIR}/third_party/install/mklml/lib)
+
+  # Check and append to existing CMAKE_BUILD_RPATH
+  if(DEFINED CMAKE_BUILD_RPATH)
+    list(APPEND CMAKE_BUILD_RPATH "${PADDLE_INFERENCE_DIR}/paddle/lib"
+         "${PADDLE_INFERENCE_DIR}/third_party/install/onednn/lib"
+         "${PADDLE_INFERENCE_DIR}/third_party/install/mklml/lib")
+  else()
+    set(CMAKE_BUILD_RPATH
+        "${PADDLE_INFERENCE_DIR}/paddle/lib"
+        "${PADDLE_INFERENCE_DIR}/third_party/install/onednn/lib"
+        "${PADDLE_INFERENCE_DIR}/third_party/install/mklml/lib")
+  endif()
+
+  # Check and append to existing CMAKE_INSTALL_RPATH
+  if(DEFINED CMAKE_INSTALL_RPATH)
+    list(APPEND CMAKE_INSTALL_RPATH "${PADDLE_INFERENCE_DIR}/paddle/lib"
+         "${PADDLE_INFERENCE_DIR}/third_party/install/onednn/lib"
+         "${PADDLE_INFERENCE_DIR}/third_party/install/mklml/lib")
+  else()
+    set(CMAKE_INSTALL_RPATH
+        "${PADDLE_INFERENCE_DIR}/paddle/lib"
+        "${PADDLE_INFERENCE_DIR}/third_party/install/onednn/lib"
+        "${PADDLE_INFERENCE_DIR}/third_party/install/mklml/lib")
+  endif()
+
+  set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
+  message(STATUS "Final PADDLE_INFERENCE_DIR is set to ${PADDLE_INFERENCE_DIR}")
+
+  set(PADDLE_INFERENCE_DIR
+      ${PADDLE_INFERENCE_DIR}
+      CACHE PATH "Path to 'paddle_inference_install_dir' or 'paddle_inference'")
+
+  # used in api_cc
+  set(PADDLE_LIBRARIES
+      "${PADDLE_INFERENCE_DIR}/paddle/lib/libpaddle_inference.so"
+      CACHE PATH "Path to libpaddle_inference.so")
+
+  include_directories("${PADDLE_INFERENCE_DIR}/")
+  set(PADDLE_LIB_THIRD_PARTY_PATH
+      "${PADDLE_INFERENCE_DIR}/third_party/install/")
+
+  include_directories("${PADDLE_LIB_THIRD_PARTY_PATH}protobuf/include")
+  include_directories("${PADDLE_LIB_THIRD_PARTY_PATH}glog/include")
+  include_directories("${PADDLE_LIB_THIRD_PARTY_PATH}gflags/include")
+  include_directories("${PADDLE_LIB_THIRD_PARTY_PATH}xxhash/include")
+  list(APPEND BACKEND_INCLUDE_DIRS "${PADDLE_INFERENCE_DIR}/paddle/include")
+  list(APPEND BACKEND_INCLUDE_DIRS
+       "${PADDLE_LIB_THIRD_PARTY_PATH}protobuf/include")
+  list(APPEND BACKEND_INCLUDE_DIRS "${PADDLE_LIB_THIRD_PARTY_PATH}glog/include")
+  list(APPEND BACKEND_INCLUDE_DIRS
+       "${PADDLE_LIB_THIRD_PARTY_PATH}gflags/include")
+  list(APPEND BACKEND_INCLUDE_DIRS
+       "${PADDLE_LIB_THIRD_PARTY_PATH}xxhash/include")
+
+  link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}protobuf/lib")
+  link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}glog/lib")
+  link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}gflags/lib")
+  link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}xxhash/lib")
+  link_directories("${PADDLE_INFERENCE_DIR}/paddle/lib")
+  list(APPEND BACKEND_LIBRARY_PATH "${PADDLE_INFERENCE_DIR}/paddle/lib")
+  list(APPEND BACKEND_LIBRARY_PATH
+       "${PADDLE_INFERENCE_DIR}/third_party/install/onednn/lib")
+  list(APPEND BACKEND_LIBRARY_PATH
+       "${PADDLE_INFERENCE_DIR}/third_party/install/mklml/lib")
+
+  # if (USE_ROCM_TOOLKIT) add_definitions(-D_GLIBCXX_USE_CXX11_ABI=1) endif()
+endif(ENABLE_PADDLE)
+
 if(BUILD_TESTING)
   enable_testing()
   add_subdirectory(${CMAKE_SOURCE_DIR}/cmake/coverage_config coverage_config)
@@ -279,9 +407,13 @@ if(NOT DEEPMD_C_ROOT)
   if(ENABLE_JAX)
     message(STATUS "- JAX")
   endif()
+  if(ENABLE_PADDLE)
+    message(STATUS "- Paddle")
+  endif()
   if(NOT ENABLE_TENSORFLOW
      AND NOT ENABLE_PYTORCH
      AND NOT ENABLE_JAX
+     AND NOT ENABLE_PADDLE
      AND NOT BUILD_PY_IF)
     message(FATAL_ERROR "No backend is enabled.")
   endif()

diff --git a/source/api_cc/CMakeLists.txt b/source/api_cc/CMakeLists.txt
@@ -27,6 +27,14 @@ if(ENABLE_JAX)
   target_link_libraries(${libname} PRIVATE TensorFlow::tensorflow_c)
   target_compile_definitions(${libname} PRIVATE BUILD_JAX)
 endif()
+if(ENABLE_PADDLE AND NOT BUILD_PY_IF)
+  target_link_libraries(${libname} PUBLIC "${PADDLE_LIBRARIES}")
+  target_compile_definitions(${libname} PUBLIC BUILD_PADDLE)
+  if(DP_VARIANT STREQUAL "rocm")
+    target_link_libraries(${libname}
+                          PUBLIC "${hip_LIB_INSTALL_DIR}/libgalaxyhip.so")
+  endif()
+endif()
 
 target_include_directories(
   ${libname}