diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile new file mode 100644 index 0000000000..18a2acda7f --- /dev/null +++ b/.devcontainer/Dockerfile @@ -0,0 +1,3 @@ +FROM mcr.microsoft.com/devcontainers/cpp:1-ubuntu-24.04 + +COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ diff --git a/.devcontainer/README.md b/.devcontainer/README.md new file mode 100644 index 0000000000..57f13065b6 --- /dev/null +++ b/.devcontainer/README.md @@ -0,0 +1,40 @@ +# DeePMD-kit devcontainer environment + +This [devcontainer](https://vscode.js.cn/docs/devcontainers/devcontainer-cli) environment setups Python and C++ environment to develop DeePMD-kit. +One can setup locally or use [GitHub Codespaces](https://docs.github.com/en/codespaces) by clicking the Code button on the DeePMD-kit repository page. +The whole setup process requires about 10 minutes, so one needs to be patient. + +## Python environment + +The following packages are installed into the Python environment `.venv`: + +- DeePMD-kit (in edit mode) +- Backends including TensorFlow, PyTorch, JAX +- LAMMPS +- MPICH +- CMake +- pre-commit (including hooks) +- Test packages including pytest +- Doc packages including sphinx + +## C++ interface + +The C++ interface with TensorFlow and PyTorch support is installed into `dp` directory. + +When calling and debuging LAMMPS with DeePMD-kit, use the following scripts instead of the regular `lmp`: + +- `.devcontainer/lmp` +- `.devcontainer/gdb_lmp` + +Use the following scripts for `pytest` with LAMMPS: + +- `.devcontainer/pytest_lmp` +- `.devcontainer/gdb_pytest_lmp` + +## Rebuild + +Usually the Python package does not need to reinstall. +But when one wants to recompile the C++ code, the following scripts can be executed. + +- `.devcontainer/build_cxx.sh` +- `.devcontainer/build_py.sh` diff --git a/.devcontainer/build_cxx.sh b/.devcontainer/build_cxx.sh new file mode 100755 index 0000000000..442539301e --- /dev/null +++ b/.devcontainer/build_cxx.sh @@ -0,0 +1,21 @@ +#!/bin/bash +set -ev + +NPROC=$(nproc --all) +SCRIPT_PATH=$(dirname $(realpath -s $0)) + +export CMAKE_PREFIX_PATH=${SCRIPT_PATH}/../libtorch +TENSORFLOW_ROOT=$(python -c 'import importlib,pathlib;print(pathlib.Path(importlib.util.find_spec("tensorflow").origin).parent)') + +mkdir -p ${SCRIPT_PATH}/../buildcxx/ +cd ${SCRIPT_PATH}/../buildcxx/ +cmake -D ENABLE_TENSORFLOW=ON \ + -D ENABLE_PYTORCH=ON \ + -D CMAKE_INSTALL_PREFIX=${SCRIPT_PATH}/../dp/ \ + -D LAMMPS_VERSION=stable_29Aug2024_update1 \ + -D CMAKE_BUILD_TYPE=Debug \ + -D BUILD_TESTING:BOOL=TRUE \ + -D TENSORFLOW_ROOT=${TENSORFLOW_ROOT} \ + ${SCRIPT_PATH}/../source +cmake --build . -j${NPROC} +cmake --install . diff --git a/.devcontainer/build_py.sh b/.devcontainer/build_py.sh new file mode 100755 index 0000000000..8e9a006a4f --- /dev/null +++ b/.devcontainer/build_py.sh @@ -0,0 +1,8 @@ +#!/bin/bash +set -ev + +SCRIPT_PATH=$(dirname $(realpath -s $0)) +cd ${SCRIPT_PATH}/.. + +uv sync --dev --python 3.12 --extra cpu --extra torch --extra jax --extra lmp --extra test --extra docs +pre-commit install diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 0000000000..27c40bbe6a --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,17 @@ +{ + "name": "DeePMD-kit", + "build": { + "dockerfile": "Dockerfile" + }, + "features": { + "ghcr.io/devcontainers/features/github-cli:1": {} + }, + "postCreateCommand": ".devcontainer/build_py.sh && .devcontainer/download_libtorch.sh && .devcontainer/build_cxx.sh && pre-commit install-hooks", + "remoteEnv": { + "PATH": "${containerEnv:PATH}:${containerWorkspaceFolder}/.venv/bin", + "DP_ENABLE_PYTORCH": "1", + "DP_VARIANT": "cpu", + "LMP_CXX11_ABI_0": "1", + "UV_EXTRA_INDEX_URL": "https://download.pytorch.org/whl/cpu" + } +} diff --git a/.devcontainer/download_libtorch.sh b/.devcontainer/download_libtorch.sh new file mode 100755 index 0000000000..d78b559997 --- /dev/null +++ b/.devcontainer/download_libtorch.sh @@ -0,0 +1,8 @@ +#!/bin/bash +set -ev + +SCRIPT_PATH=$(dirname $(realpath -s $0)) +cd ${SCRIPT_PATH}/.. + +wget https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-2.5.0%2Bcpu.zip -O ~/libtorch.zip +unzip ~/libtorch.zip diff --git a/.devcontainer/gdb_lmp b/.devcontainer/gdb_lmp new file mode 100755 index 0000000000..33e883780b --- /dev/null +++ b/.devcontainer/gdb_lmp @@ -0,0 +1,9 @@ +#!/bin/bash +SCRIPT_PATH=$(dirname $(realpath -s $0)) + +export CMAKE_PREFIX_PATH=${SCRIPT_PATH}/../libtorch +TENSORFLOW_ROOT=$(python -c 'import importlib,pathlib;print(pathlib.Path(importlib.util.find_spec("tensorflow").origin).parent)') + +env LAMMPS_PLUGIN_PATH=${SCRIPT_PATH}/../dp/lib/deepmd_lmp \ + LD_LIBRARY_PATH=${SCRIPT_PATH}/../dp/lib:${CMAKE_PREFIX_PATH}/lib:${TENSORFLOW_ROOT} \ + gdb ${SCRIPT_PATH}/../.venv/lib/python3.12/site-packages/lammps/lmp "$@" diff --git a/.devcontainer/gdb_pytest_lmp b/.devcontainer/gdb_pytest_lmp new file mode 100755 index 0000000000..e27e40d4b0 --- /dev/null +++ b/.devcontainer/gdb_pytest_lmp @@ -0,0 +1,9 @@ +#!/bin/bash +SCRIPT_PATH=$(dirname $(realpath -s $0))/../.. + +export CMAKE_PREFIX_PATH=${SCRIPT_PATH}/../libtorch +TENSORFLOW_ROOT=$(python -c 'import importlib,pathlib;print(pathlib.Path(importlib.util.find_spec("tensorflow").origin).parent)') + +env LAMMPS_PLUGIN_PATH=${SCRIPT_PATH}/../dp/lib/deepmd_lmp \ + LD_LIBRARY_PATH=${SCRIPT_PATH}/../dp/lib:${CMAKE_PREFIX_PATH}/lib:${TENSORFLOW_ROOT} \ + gdb --args python -m pytest -s "$@" diff --git a/.devcontainer/lmp b/.devcontainer/lmp new file mode 100755 index 0000000000..c8e781aa57 --- /dev/null +++ b/.devcontainer/lmp @@ -0,0 +1,9 @@ +#!/bin/bash +SCRIPT_PATH=$(dirname $(realpath -s $0)) + +export CMAKE_PREFIX_PATH=${SCRIPT_PATH}/../libtorch +TENSORFLOW_ROOT=$(python -c 'import importlib,pathlib;print(pathlib.Path(importlib.util.find_spec("tensorflow").origin).parent)') + +env LAMMPS_PLUGIN_PATH=${SCRIPT_PATH}/../dp/lib/deepmd_lmp \ + LD_LIBRARY_PATH=${SCRIPT_PATH}/../dp/lib:${CMAKE_PREFIX_PATH}/lib:${TENSORFLOW_ROOT} \ + ${SCRIPT_PATH}/../.venv/bin/lmp "$@" diff --git a/.devcontainer/pytest_lmp b/.devcontainer/pytest_lmp new file mode 100755 index 0000000000..9371ba72d5 --- /dev/null +++ b/.devcontainer/pytest_lmp @@ -0,0 +1,9 @@ +#!/bin/bash +SCRIPT_PATH=$(dirname $(realpath -s $0))/../.. + +export CMAKE_PREFIX_PATH=${SCRIPT_PATH}/../libtorch +TENSORFLOW_ROOT=$(python -c 'import importlib,pathlib;print(pathlib.Path(importlib.util.find_spec("tensorflow").origin).parent)') + +env LAMMPS_PLUGIN_PATH=${SCRIPT_PATH}/../dp/lib/deepmd_lmp \ + LD_LIBRARY_PATH=${SCRIPT_PATH}/../dp/lib:${CMAKE_PREFIX_PATH}/lib:${TENSORFLOW_ROOT} \ + python -m pytest "$@" diff --git a/.git_archival.txt b/.git_archival.txt new file mode 100644 index 0000000000..7c5100942a --- /dev/null +++ b/.git_archival.txt @@ -0,0 +1,3 @@ +node: $Format:%H$ +node-date: $Format:%cI$ +describe-name: $Format:%(describe:tags=true,match=*[0-9]*)$ diff --git a/.gitattributes b/.gitattributes index e77d446ba6..776405a339 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,2 +1,4 @@ # do not show up detailed difference on GitHub source/3rdparty/* linguist-generated=true +source/3rdparty/README.md linguist-generated=false +.git_archival.txt export-subst diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml index f13b187dfb..49918e47ac 100644 --- a/.github/ISSUE_TEMPLATE/bug-report.yml +++ b/.github/ISSUE_TEMPLATE/bug-report.yml @@ -21,10 +21,10 @@ body: validations: required: true - type: input - id: tf-version + id: backend-version attributes: - label: TensorFlow Version - description: "The version will be printed when running DeePMD-kit." + label: Backend and its version + description: "The backend and its version will be printed when running DeePMD-kit, e.g. TensorFlow v2.15.0." validations: required: true - type: dropdown diff --git a/.github/ISSUE_TEMPLATE/generic-issue.yml b/.github/ISSUE_TEMPLATE/generic-issue.yml index af9f01c64d..f84097580e 100644 --- a/.github/ISSUE_TEMPLATE/generic-issue.yml +++ b/.github/ISSUE_TEMPLATE/generic-issue.yml @@ -21,10 +21,10 @@ body: validations: required: true - type: input - id: tf-version + id: backend-version attributes: - label: TensorFlow Version - description: "The version will be printed when running DeePMD-kit." + label: Backend and its version + description: "The backend and its version will be printed when running DeePMD-kit, e.g. TensorFlow v2.15.0." validations: required: true - type: textarea diff --git a/.github/labeler.yml b/.github/labeler.yml index b0a85679de..0183a144ba 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -1,39 +1,38 @@ -Python: -- changed-files: - - any-glob-to-any-file: - - deepmd/**/* - - deepmd_utils/**/* - - source/tests/**/* -Docs: -- changed-files: - - any-glob-to-any-file: doc/**/* -Examples: -- changed-files: - - any-glob-to-any-file: examples/**/* -Core: -- changed-files: - - any-glob-to-any-file: source/lib/**/* -CUDA: -- changed-files: - - any-glob-to-any-file: source/lib/src/gpu/**/* -ROCM: -- changed-files: - - any-glob-to-any-file: source/lib/src/gpu/**/* -OP: -- changed-files: - - any-glob-to-any-file: source/op/**/* -C++: -- changed-files: - - any-glob-to-any-file: source/api_cc/**/* -C: -- changed-files: - - any-glob-to-any-file: source/api_c/**/* -LAMMPS: -- changed-files: - - any-glob-to-any-file: source/lmp/**/* -Gromacs: -- changed-files: - - any-glob-to-any-file: source/gmx/**/* -i-Pi: -- changed-files: - - any-glob-to-any-file: source/ipi/**/* +Python: + - changed-files: + - any-glob-to-any-file: + - deepmd/**/* + - source/tests/**/* +Docs: + - changed-files: + - any-glob-to-any-file: doc/**/* +Examples: + - changed-files: + - any-glob-to-any-file: examples/**/* +Core: + - changed-files: + - any-glob-to-any-file: source/lib/**/* +CUDA: + - changed-files: + - any-glob-to-any-file: source/lib/src/gpu/**/* +ROCM: + - changed-files: + - any-glob-to-any-file: source/lib/src/gpu/**/* +OP: + - changed-files: + - any-glob-to-any-file: source/op/**/* +C++: + - changed-files: + - any-glob-to-any-file: source/api_cc/**/* +C: + - changed-files: + - any-glob-to-any-file: source/api_c/**/* +LAMMPS: + - changed-files: + - any-glob-to-any-file: source/lmp/**/* +Gromacs: + - changed-files: + - any-glob-to-any-file: source/gmx/**/* +i-PI: + - changed-files: + - any-glob-to-any-file: source/ipi/**/* diff --git a/.github/workflows/build_cc.yml b/.github/workflows/build_cc.yml index f029517d80..a1ac032891 100644 --- a/.github/workflows/build_cc.yml +++ b/.github/workflows/build_cc.yml @@ -1,11 +1,17 @@ on: push: + branches-ignore: + - "gh-readonly-queue/**" pull_request: + merge_group: +concurrency: + group: ${{ github.workflow }}-${{ github.ref || github.run_id }} + cancel-in-progress: true name: Build C++ jobs: buildcc: name: Build C++ - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 strategy: matrix: include: @@ -24,9 +30,13 @@ jobs: - uses: actions/setup-python@v5 with: python-version: '3.11' - cache: 'pip' - uses: lukka/get-cmake@latest - - run: python -m pip install tensorflow + - run: python -m pip install uv + - run: source/install/uv_with_retry.sh pip install --system tensorflow + - name: Download libtorch + run: | + wget https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-2.1.2%2Bcpu.zip -O libtorch.zip + unzip libtorch.zip - run: | wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.0-1_all.deb \ && sudo dpkg -i cuda-keyring_1.0-1_all.deb \ @@ -48,13 +58,17 @@ jobs: && sudo apt-get update \ && sudo apt-get install -y rocm-dev hipcub-dev if: matrix.variant == 'rocm' - - run: source/install/build_cc.sh + - run: | + export CMAKE_PREFIX_PATH=$GITHUB_WORKSPACE/libtorch + source/install/build_cc.sh env: DP_VARIANT: ${{ matrix.dp_variant }} DOWNLOAD_TENSORFLOW: "FALSE" CMAKE_GENERATOR: Ninja if: matrix.variant != 'clang' - - run: source/install/build_cc.sh + - run: | + export CMAKE_PREFIX_PATH=$GITHUB_WORKSPACE/libtorch + source/install/build_cc.sh env: DP_VARIANT: cpu DOWNLOAD_TENSORFLOW: "FALSE" diff --git a/.github/workflows/build_wheel.yml b/.github/workflows/build_wheel.yml index 23076e9bf5..db5745e241 100644 --- a/.github/workflows/build_wheel.yml +++ b/.github/workflows/build_wheel.yml @@ -2,7 +2,16 @@ name: Build and upload to PyPI on: push: + branches-ignore: + - "gh-readonly-queue/**" + tags: + - "v*" pull_request: + merge_group: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref || github.run_id }} + cancel-in-progress: true jobs: determine-arm64-runner: @@ -41,12 +50,12 @@ jobs: cuda_version: 11.8 dp_pkg_name: deepmd-kit-cu11 # macos-x86-64 - - os: macos-latest + - os: macos-13 python: 311 platform_id: macosx_x86_64 dp_variant: cpu # macos-arm64 - - os: macos-latest + - os: macos-14 python: 311 platform_id: macosx_arm64 dp_variant: cpu @@ -65,11 +74,19 @@ jobs: with: # https://github.com/pypa/setuptools_scm/issues/480 fetch-depth: 0 + - name: Install uv + run: curl --proto '=https' --tlsv1.2 -LsSf https://github.com/astral-sh/uv/releases/download/0.2.24/uv-installer.sh | sh + if: runner.os != 'Linux' - uses: docker/setup-qemu-action@v3 name: Setup QEMU if: matrix.platform_id == 'manylinux_aarch64' && matrix.os == 'ubuntu-latest' + # detect version in advance. See #3168 + - run: | + echo "SETUPTOOLS_SCM_PRETEND_VERSION=$(pipx run uv tool run --from setuptools_scm python -m setuptools_scm)" >> $GITHUB_ENV + rm -rf .git + if: matrix.dp_pkg_name == 'deepmd-kit-cu11' - name: Build wheels - uses: pypa/cibuildwheel@v2.16 + uses: pypa/cibuildwheel@v2.21 env: CIBW_BUILD_VERBOSITY: 1 CIBW_ARCHS: all @@ -77,6 +94,7 @@ jobs: DP_VARIANT: ${{ matrix.dp_variant }} CUDA_VERSION: ${{ matrix.cuda_version }} DP_PKG_NAME: ${{ matrix.dp_pkg_name }} + CIBW_BUILD_FRONTEND: 'build[uv]' - uses: actions/upload-artifact@v4 with: name: cibw-cp${{ matrix.python }}-${{ matrix.platform_id }}-cu${{ matrix.cuda_version }}-${{ strategy.job-index }} @@ -88,14 +106,8 @@ jobs: - uses: actions/checkout@v4 with: fetch-depth: 0 - - uses: actions/setup-python@v5 - name: Install Python - with: - python-version: '3.11' - cache: 'pip' - - run: python -m pip install build - name: Build sdist - run: python -m build --sdist + run: pipx run uv tool run --with build[uv] --from build python -m build --installer uv --sdist - uses: actions/upload-artifact@v4 with: @@ -130,13 +142,16 @@ jobs: - variant: "_cu11" cuda_version: "11" steps: + - name: Delete huge unnecessary tools folder + run: rm -rf /opt/hostedtoolcache - uses: actions/checkout@v4 - uses: actions/download-artifact@v4 with: path: source/install/docker/dist + pattern: cibw-*-manylinux_x86_64-cu${{ matrix.cuda_version }}* merge-multiple: true - name: Log in to the Container registry - uses: docker/login-action@343f7c4344506bcbf9b4de18042ae17996df046d + uses: docker/login-action@v3 with: registry: ghcr.io username: ${{ github.actor }} @@ -144,12 +159,12 @@ jobs: - name: Extract metadata (tags, labels) for Docker id: meta - uses: docker/metadata-action@dbef88086f6cef02e264edb7dbf63250c17cef6c + uses: docker/metadata-action@v5 with: images: ghcr.io/deepmodeling/deepmd-kit - name: Build and push Docker image - uses: docker/build-push-action@4a13e500e55cf31b7a5d59a38ab2040ab0f42f56 + uses: docker/build-push-action@v6 with: context: source/install/docker push: ${{ github.repository_owner == 'deepmodeling' && github.event_name == 'push' && github.actor != 'dependabot[bot]' }} @@ -166,6 +181,7 @@ jobs: - uses: actions/download-artifact@v4 with: path: dist/packages + pattern: cibw-* merge-multiple: true - uses: actions/setup-python@v5 name: Install Python diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index a9a162432c..583e7785d9 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -2,14 +2,18 @@ name: "CodeQL" on: push: + branches-ignore: + - "gh-readonly-queue/**" pull_request: schedule: - cron: '45 2 * * 2' - +concurrency: + group: ${{ github.workflow }}-${{ github.ref || github.run_id }} + cancel-in-progress: true jobs: analyze: name: Analyze - runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }} + runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-22.04' }} timeout-minutes: ${{ (matrix.language == 'swift' && 120) || 360 }} permissions: actions: read @@ -37,6 +41,8 @@ jobs: && sudo apt-get update \ && sudo apt-get -y install cuda-cudart-dev-12-2 cuda-nvcc-12-2 python -m pip install tensorflow + wget https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-2.1.2%2Bcpu.zip -O libtorch.zip + unzip libtorch.zip env: DEBIAN_FRONTEND: noninteractive # Initializes the CodeQL tools for scanning. @@ -46,7 +52,9 @@ jobs: languages: ${{ matrix.language }} queries: security-extended,security-and-quality - name: "Run, Build Application using script" - run: source/install/build_cc.sh + run: | + export CMAKE_PREFIX_PATH=$GITHUB_WORKSPACE/libtorch + source/install/build_cc.sh env: DP_VARIANT: cuda DOWNLOAD_TENSORFLOW: "FALSE" diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml index 877c780f1f..be43c5cff2 100644 --- a/.github/workflows/labeler.yml +++ b/.github/workflows/labeler.yml @@ -11,4 +11,4 @@ jobs: steps: - uses: actions/labeler@v5 with: - repo-token: "${{ secrets.GITHUB_TOKEN }}" + repo-token: "${{ secrets.GITHUB_TOKEN }}" diff --git a/.github/workflows/package_c.yml b/.github/workflows/package_c.yml index 5594c79181..f5e4a97d56 100644 --- a/.github/workflows/package_c.yml +++ b/.github/workflows/package_c.yml @@ -2,8 +2,15 @@ name: Build C library on: push: + branches-ignore: + - "gh-readonly-queue/**" + tags: + - "v*" pull_request: - + merge_group: +concurrency: + group: ${{ github.workflow }}-${{ github.ref || github.run_id }} + cancel-in-progress: true jobs: build_c: name: Build C library @@ -11,7 +18,7 @@ jobs: strategy: matrix: include: - - tensorflow_build_version: "2.15" + - tensorflow_build_version: "2.18" tensorflow_version: "" filename: libdeepmd_c.tar.gz - tensorflow_build_version: "2.14" @@ -19,6 +26,8 @@ jobs: filename: libdeepmd_c_cu11.tar.gz steps: - uses: actions/checkout@v4 + with: + fetch-depth: 0 - name: Package C library run: ./source/install/docker_package_c.sh env: @@ -35,7 +44,7 @@ jobs: - name: Test C library run: ./source/install/docker_test_package_c.sh - name: Release - uses: softprops/action-gh-release@v1 + uses: softprops/action-gh-release@v2 if: startsWith(github.ref, 'refs/tags/') with: files: ${{ matrix.filename }} diff --git a/.github/workflows/test_cc.yml b/.github/workflows/test_cc.yml index ef6fade8e5..f7f3a4f431 100644 --- a/.github/workflows/test_cc.yml +++ b/.github/workflows/test_cc.yml @@ -1,11 +1,20 @@ on: push: + branches-ignore: + - "gh-readonly-queue/**" pull_request: + merge_group: +concurrency: + group: ${{ github.workflow }}-${{ github.ref || github.run_id }} + cancel-in-progress: true name: Test C++ jobs: testcc: name: Test C++ - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 + strategy: + matrix: + check_memleak: [true, false] steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 @@ -17,29 +26,41 @@ jobs: with: mpi: mpich - uses: lukka/get-cmake@latest - - run: python -m pip install tensorflow - - run: source/install/test_cc_local.sh + - run: python -m pip install uv + - name: Install Python dependencies + run: | + source/install/uv_with_retry.sh pip install --system tensorflow-cpu + export TENSORFLOW_ROOT=$(python -c 'import importlib,pathlib;print(pathlib.Path(importlib.util.find_spec("tensorflow").origin).parent)') + source/install/uv_with_retry.sh pip install --system -e .[cpu,test,lmp,jax] mpi4py + - name: Convert models + run: source/tests/infer/convert-models.sh + - name: Download libtorch + run: | + wget https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-2.1.2%2Bcpu.zip -O libtorch.zip + unzip libtorch.zip + # https://github.com/actions/runner-images/issues/9491 + - name: Fix kernel mmap rnd bits + run: sudo sysctl vm.mmap_rnd_bits=28 + if: ${{ matrix.check_memleak }} + - run: | + export CMAKE_PREFIX_PATH=$GITHUB_WORKSPACE/libtorch + source/install/test_cc_local.sh env: OMP_NUM_THREADS: 1 TF_INTRA_OP_PARALLELISM_THREADS: 1 TF_INTER_OP_PARALLELISM_THREADS: 1 LMP_CXX11_ABI_0: 1 CMAKE_GENERATOR: Ninja + CXXFLAGS: ${{ matrix.check_memleak && '-fsanitize=leak' || '' }} # test lammps - # ASE issue: https://gitlab.com/ase/ase/-/merge_requests/2843 - # TODO: remove ase version when ase has new release - - run: | - python -m pip install -U pip - python -m pip install -e .[cpu,test,lmp] "ase @ https://gitlab.com/ase/ase/-/archive/8c5aa5fd6448c5cfb517a014dccf2b214a9dfa8f/ase-8c5aa5fd6448c5cfb517a014dccf2b214a9dfa8f.tar.gz" - env: - DP_BUILD_TESTING: 1 - run: pytest --cov=deepmd source/lmp/tests env: OMP_NUM_THREADS: 1 TF_INTRA_OP_PARALLELISM_THREADS: 1 TF_INTER_OP_PARALLELISM_THREADS: 1 LAMMPS_PLUGIN_PATH: ${{ github.workspace }}/dp_test/lib/deepmd_lmp - LD_LIBRARY_PATH: ${{ github.workspace }}/dp_test/lib + LD_LIBRARY_PATH: ${{ github.workspace }}/dp_test/lib:${{ github.workspace }}/libtorch/lib + if: ${{ !matrix.check_memleak }} # test ipi - run: pytest --cov=deepmd source/ipi/tests env: @@ -47,10 +68,11 @@ jobs: TF_INTRA_OP_PARALLELISM_THREADS: 1 TF_INTER_OP_PARALLELISM_THREADS: 1 PATH: ${{ github.workspace }}/dp_test/bin:$PATH - LD_LIBRARY_PATH: ${{ github.workspace }}/dp_test/lib - - uses: codecov/codecov-action@v3 - with: - gcov: true + LD_LIBRARY_PATH: ${{ github.workspace }}/dp_test/lib:${{ github.workspace }}/libtorch/lib + if: ${{ !matrix.check_memleak }} + - uses: codecov/codecov-action@v5 + env: + CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} pass: name: Pass testing C++ needs: [testcc] diff --git a/.github/workflows/test_cuda.yml b/.github/workflows/test_cuda.yml index e74c0abde2..4dbdc5acb9 100644 --- a/.github/workflows/test_cuda.yml +++ b/.github/workflows/test_cuda.yml @@ -4,6 +4,14 @@ on: pull_request: types: - "labeled" + # to let the PR pass the test + - "opened" + - "reopened" + - "synchronize" + merge_group: +concurrency: + group: ${{ github.workflow }}-${{ github.ref || github.run_id }} + cancel-in-progress: true name: Test CUDA jobs: test_cuda: @@ -11,9 +19,9 @@ jobs: runs-on: nvidia # https://github.com/deepmodeling/deepmd-kit/pull/2884#issuecomment-1744216845 container: - image: nvidia/cuda:12.2.0-devel-ubuntu22.04 + image: nvidia/cuda:12.6.2-cudnn-devel-ubuntu22.04 options: --gpus all - if: github.repository_owner == 'deepmodeling' && github.event.label.name == 'Test CUDA' || github.event_name == 'workflow_dispatch' + if: github.repository_owner == 'deepmodeling' && (github.event_name == 'pull_request' && github.event.label && github.event.label.name == 'Test CUDA' || github.event_name == 'workflow_dispatch' || github.event_name == 'merge_group') steps: - name: Make sudo and git work run: apt-get update && apt-get install -y sudo git @@ -26,25 +34,45 @@ jobs: uses: mpi4py/setup-mpi@v1 with: mpi: mpich + - name: Install wget and unzip + run: apt-get update && apt-get install -y wget unzip - uses: lukka/get-cmake@latest + with: + useLocalCache: true + useCloudCache: false - run: | wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.0-1_all.deb \ && sudo dpkg -i cuda-keyring_1.0-1_all.deb \ && sudo apt-get update \ - && sudo apt-get -y install cuda-12-2 libcudnn8=8.9.5.*-1+cuda12.2 + && sudo apt-get -y install cuda-12-3 libcudnn8=8.9.5.*-1+cuda12.3 if: false # skip as we use nvidia image - - name: Set PyPI mirror for Aliyun cloud machine - run: python -m pip config --user set global.index-url https://mirrors.aliyun.com/pypi/simple/ - - run: python -m pip install -U "pip>=21.3.1,!=23.0.0" - - run: python -m pip install "tensorflow>=2.15.0rc0" - - run: python -m pip install -v -e .[gpu,test,lmp,cu12] "ase @ https://gitlab.com/ase/ase/-/archive/8c5aa5fd6448c5cfb517a014dccf2b214a9dfa8f/ase-8c5aa5fd6448c5cfb517a014dccf2b214a9dfa8f.tar.gz" + - run: python -m pip install -U uv + - run: source/install/uv_with_retry.sh pip install --system "tensorflow~=2.18.0rc2" "torch~=2.5.0" "jax[cuda12]" + - run: | + export PYTORCH_ROOT=$(python -c 'import torch;print(torch.__path__[0])') + export TENSORFLOW_ROOT=$(python -c 'import importlib,pathlib;print(pathlib.Path(importlib.util.find_spec("tensorflow").origin).parent)') + source/install/uv_with_retry.sh pip install --system -v -e .[gpu,test,lmp,cu12,torch,jax] mpi4py env: - DP_BUILD_TESTING: 1 DP_VARIANT: cuda - CUDA_PATH: /usr/local/cuda-12.2 + DP_ENABLE_NATIVE_OPTIMIZATION: 1 + DP_ENABLE_PYTORCH: 1 - run: dp --version - - run: python -m pytest -s --cov=deepmd --cov=deepmd_utils source/tests --durations=0 - - run: source/install/test_cc_local.sh + - run: python -m pytest source/tests --durations=0 + env: + NUM_WORKERS: 0 + CUDA_VISIBLE_DEVICES: 0 + # See https://jax.readthedocs.io/en/latest/gpu_memory_allocation.html + XLA_PYTHON_CLIENT_PREALLOCATE: false + - name: Convert models + run: source/tests/infer/convert-models.sh + - name: Download libtorch + run: | + wget https://download.pytorch.org/libtorch/cu124/libtorch-cxx11-abi-shared-with-deps-2.5.0%2Bcu124.zip -O libtorch.zip + unzip libtorch.zip + - run: | + export CMAKE_PREFIX_PATH=$GITHUB_WORKSPACE/libtorch + export LD_LIBRARY_PATH=$CUDA_PATH/lib64:/usr/lib/x86_64-linux-gnu/:$LD_LIBRARY_PATH + source/install/test_cc_local.sh env: OMP_NUM_THREADS: 1 TF_INTRA_OP_PARALLELISM_THREADS: 1 @@ -53,18 +81,25 @@ jobs: CMAKE_GENERATOR: Ninja DP_VARIANT: cuda DP_USE_MPICH2: 1 - CUDA_PATH: /usr/local/cuda-12.2 - run: | - export LD_LIBRARY_PATH=$GITHUB_WORKSPACE/dp_test/lib:$CUDA_PATH/lib64:$LD_LIBRARY_PATH + export LD_LIBRARY_PATH=$CUDA_PATH/lib64:/usr/lib/x86_64-linux-gnu/:$GITHUB_WORKSPACE/dp_test/lib:$GITHUB_WORKSPACE/libtorch/lib:$LD_LIBRARY_PATH export PATH=$GITHUB_WORKSPACE/dp_test/bin:$PATH - python -m pytest -s --cov=deepmd source/lmp/tests - python -m pytest -s --cov=deepmd source/ipi/tests + python -m pytest -s source/lmp/tests || (cat log.lammps && exit 1) + python -m pytest source/ipi/tests env: OMP_NUM_THREADS: 1 TF_INTRA_OP_PARALLELISM_THREADS: 1 TF_INTER_OP_PARALLELISM_THREADS: 1 LAMMPS_PLUGIN_PATH: ${{ github.workspace }}/dp_test/lib/deepmd_lmp - CUDA_PATH: /usr/local/cuda-12.2 - - uses: codecov/codecov-action@v3 + CUDA_VISIBLE_DEVICES: 0 + pass: + name: Pass testing on CUDA + needs: [test_cuda] + runs-on: ubuntu-latest + if: always() + steps: + - name: Decide whether the needed jobs succeeded or failed + uses: re-actors/alls-green@release/v1 with: - gcov: true + jobs: ${{ toJSON(needs) }} + allowed-skips: test_cuda diff --git a/.github/workflows/test_python.yml b/.github/workflows/test_python.yml index 1bd78bfae0..e30a19c8b1 100644 --- a/.github/workflows/test_python.yml +++ b/.github/workflows/test_python.yml @@ -1,50 +1,107 @@ on: push: + branches-ignore: + - "gh-readonly-queue/**" pull_request: + merge_group: +concurrency: + group: ${{ github.workflow }}-${{ github.ref || github.run_id }} + cancel-in-progress: true name: Test Python jobs: testpython: name: Test Python runs-on: ubuntu-22.04 strategy: + fail-fast: false matrix: - include: - - python: 3.7 - tf: 1.14 - - python: 3.8 - tf: - - python: "3.11" - tf: + group: [1, 2, 3, 4, 5, 6] + python: ["3.9", "3.12"] steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: python-version: ${{ matrix.python }} - cache: 'pip' - - uses: mpi4py/setup-mpi@v1 - if: ${{ matrix.tf == '' }} - with: - mpi: openmpi - # https://github.com/pypa/pip/issues/11770 - - run: python -m pip install -U "pip>=21.3.1,!=23.0.0" - - run: pip install -e .[cpu,test] + - run: python -m pip install -U uv + - run: | + source/install/uv_with_retry.sh pip install --system openmpi tensorflow-cpu + source/install/uv_with_retry.sh pip install --system torch -i https://download.pytorch.org/whl/cpu + export TENSORFLOW_ROOT=$(python -c 'import tensorflow;print(tensorflow.__path__[0])') + export PYTORCH_ROOT=$(python -c 'import torch;print(torch.__path__[0])') + source/install/uv_with_retry.sh pip install --system -e .[test,jax] mpi4py + source/install/uv_with_retry.sh pip install --system horovod --no-build-isolation env: - TENSORFLOW_VERSION: ${{ matrix.tf }} + # Please note that uv has some issues with finding + # existing TensorFlow package. Currently, it uses + # TensorFlow in the build dependency, but if it + # changes, setting `TENSORFLOW_ROOT`. + DP_ENABLE_PYTORCH: 1 DP_BUILD_TESTING: 1 - - run: pip install horovod mpi4py - if: ${{ matrix.tf == '' }} - env: + UV_EXTRA_INDEX_URL: "https://pypi.anaconda.org/mpi4py/simple" HOROVOD_WITH_TENSORFLOW: 1 - HOROVOD_WITHOUT_GLOO: 1 + HOROVOD_WITHOUT_PYTORCH: 1 + HOROVOD_WITH_MPI: 1 - run: dp --version - - run: pytest --cov=deepmd --cov=deepmd_utils source/tests --durations=0 - - uses: codecov/codecov-action@v3 + - name: Get durations from cache + uses: actions/cache@v4 + with: + path: .test_durations + # the key must never match, even when restarting workflows, as that + # will cause durations to get out of sync between groups, the + # combined durations will be loaded if available + key: test2-durations-split-${{ github.run_id }}-${{ github.run_number}}-${{ matrix.python }}-${{ matrix.group }} + restore-keys: | + test2-durations-combined-${{ matrix.python }}-${{ github.sha }} + test2-durations-combined-${{ matrix.python }} + - run: pytest --cov=deepmd source/tests --durations=0 --splits 6 --group ${{ matrix.group }} --store-durations --clean-durations --durations-path=.test_durations --splitting-algorithm least_duration + env: + NUM_WORKERS: 0 + - name: Test TF2 eager mode + run: pytest --cov=deepmd --cov-append source/tests/consistent/io/test_io.py source/jax2tf_tests --durations=0 + env: + NUM_WORKERS: 0 + DP_TEST_TF2_ONLY: 1 + DP_DTYPE_PROMOTION_STRICT: 1 + if: matrix.group == 1 + - run: mv .test_durations .test_durations_${{ matrix.group }} + - name: Upload partial durations + uses: actions/upload-artifact@v4 + with: + name: split-${{ matrix.python }}-${{ matrix.group }} + path: .test_durations_${{ matrix.group }} + include-hidden-files: true + - uses: codecov/codecov-action@v5 + env: + CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} + update_durations: + name: Combine and update integration test durations + runs-on: ubuntu-22.04 + strategy: + fail-fast: false + matrix: + python: ["3.9", "3.12"] + needs: testpython + steps: + - name: Get durations from cache + uses: actions/cache@v4 + with: + path: .test_durations + # key won't match during the first run for the given commit, but + # restore-key will if there's a previous stored durations file, + # so cache will both be loaded and stored + key: test2-durations-combined-${{ matrix.python }}-${{ github.sha }} + restore-keys: test2-durations-combined-${{ matrix.python }} + - name: Download artifacts + uses: actions/download-artifact@v4 with: - gcov: true + pattern: split-${{ matrix.python }}-* + merge-multiple: true + - name: Combine test durations + run: jq -s add .test_durations_* > .test_durations pass: name: Pass testing Python - needs: [testpython] + needs: [testpython, update_durations] runs-on: ubuntu-latest if: always() steps: diff --git a/.github/workflows/todo.yml b/.github/workflows/todo.yml new file mode 100644 index 0000000000..2608bb1071 --- /dev/null +++ b/.github/workflows/todo.yml @@ -0,0 +1,20 @@ +name: TODO workflow +on: + push: + branches: + - devel +jobs: + build: + if: github.repository_owner == 'deepmodeling' + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Run tdg-github-action + uses: ribtoks/tdg-github-action@master + with: + TOKEN: ${{ secrets.GITHUB_TOKEN }} + REPO: ${{ github.repository }} + SHA: ${{ github.sha }} + REF: ${{ github.ref }} + EXCLUDE_PATTERN: "(source/3rdparty|.git)/.*" + COMMENT_ON_ISSUES: 1 diff --git a/.gitignore b/.gitignore index 82d3e4a7da..c574da757a 100644 --- a/.gitignore +++ b/.gitignore @@ -13,6 +13,7 @@ *.bz2 *.pyc *.pb +*.DS_Store tmp* CMakeCache.txt CMakeFiles @@ -43,3 +44,9 @@ build_cc_tests build_c_tests build_c/ libdeepmd_c/ +.uv/ +libtorch/ +uv.lock +buildcxx/ +node_modules/ +*.bib.original diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d4e89f1129..7aa2012200 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,119 +1,163 @@ # See https://pre-commit.com for more information # See https://pre-commit.com/hooks.html for more hooks repos: -- repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.5.0 + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v5.0.0 hooks: - - id: trailing-whitespace + - id: trailing-whitespace exclude: "^.+\\.pbtxt$" - - id: end-of-file-fixer + - id: end-of-file-fixer exclude: "^.+\\.pbtxt$" - - id: check-yaml - - id: check-json - - id: check-added-large-files - args: ['--maxkb=1024', '--enforce-all'] - # TODO: remove the following after resolved + - id: check-yaml + - id: check-json + - id: check-added-large-files + args: ["--maxkb=1024", "--enforce-all"] exclude: | - (?x)^( - source/tests/infer/dipolecharge_e.pbtxt| - source/tests/infer/deeppolar_new.pbtxt - )$ - - id: check-merge-conflict - - id: check-symlinks - - id: check-toml -# Python -- repo: https://github.com/PyCQA/isort + (?x)^( + source/tests/infer/dipolecharge_e.pbtxt| + source/tests/infer/deeppolar_new.pbtxt + )$ + - id: check-merge-conflict + - id: check-symlinks + - id: check-toml + # Python + - repo: https://github.com/PyCQA/isort rev: 5.13.2 hooks: - - id: isort - files: \.py$ - exclude: ^source/3rdparty -- repo: https://github.com/astral-sh/ruff-pre-commit + - id: isort + files: \.py$ + exclude: ^source/3rdparty + - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: v0.1.13 + rev: v0.7.4 + hooks: + - id: ruff + args: ["--fix"] + exclude: ^source/3rdparty + types_or: [python, pyi, jupyter] + - id: ruff-format + exclude: ^source/3rdparty + types_or: [python, pyi, jupyter] + - repo: https://github.com/pycqa/flake8 + # flake8 cannot autofix + rev: "7.1.1" hooks: - - id: ruff - args: ["--fix"] - exclude: ^source/3rdparty - types_or: [python, pyi, jupyter] - - id: ruff-format - exclude: ^source/3rdparty - types_or: [python, pyi, jupyter] -# numpydoc -- repo: https://github.com/Carreau/velin + - id: flake8 + additional_dependencies: + - torchfix==0.6.0 + - flake8-pyproject==1.2.3 + # numpydoc + - repo: https://github.com/Carreau/velin rev: 0.0.12 hooks: - - id: velin - args: ["--write"] - exclude: ^source/3rdparty -# Python inside docs -- repo: https://github.com/asottile/blacken-docs - rev: 1.16.0 + - id: velin + args: ["--write"] + exclude: ^source/3rdparty + # Python inside docs + - repo: https://github.com/asottile/blacken-docs + rev: 1.19.1 hooks: - - id: blacken-docs -# C++ -- repo: https://github.com/pre-commit/mirrors-clang-format - rev: v17.0.6 + - id: blacken-docs + # C++ + - repo: https://github.com/pre-commit/mirrors-clang-format + rev: v19.1.3 hooks: - - id: clang-format - exclude: ^source/3rdparty|source/lib/src/gpu/cudart/.+\.inc -# CSS -- repo: https://github.com/pre-commit/mirrors-csslint - rev: v1.0.5 + - id: clang-format + exclude: ^(source/3rdparty|source/lib/src/gpu/cudart/.+\.inc|.+\.ipynb$) + # markdown, yaml, CSS, javascript + - repo: https://github.com/pre-commit/mirrors-prettier + rev: v4.0.0-alpha.8 + hooks: + - id: prettier + types_or: [markdown, yaml, css] + # workflow files cannot be modified by pre-commit.ci + exclude: ^(source/3rdparty|\.github/workflows|\.clang-format) + # Shell + - repo: https://github.com/scop/pre-commit-shfmt + rev: v3.10.0-1 hooks: - - id: csslint -# Shell -- repo: https://github.com/scop/pre-commit-shfmt - rev: v3.7.0-4 - hooks: - - id: shfmt -# CMake -- repo: https://github.com/cheshirekow/cmake-format-precommit - rev: v0.6.13 - hooks: - - id: cmake-format - #- id: cmake-lint -# license header -- repo: https://github.com/Lucas-C/pre-commit-hooks - rev: v1.5.4 - hooks: - # C++, js - - id: insert-license + - id: shfmt + # CMake + - repo: https://github.com/cheshirekow/cmake-format-precommit + rev: v0.6.13 + hooks: + - id: cmake-format + #- id: cmake-lint + - repo: https://github.com/njzjz/mirrors-bibtex-tidy + rev: v1.13.0 + hooks: + - id: bibtex-tidy + args: + - --curly + - --numeric + - --align=13 + - --blank-lines + # disable sort: the order of keys and fields has explict meanings + #- --sort=key + - --duplicates=key,doi,citation,abstract + - --merge=combine + #- --sort-fields + #- --strip-comments + - --trailing-commas + - --encode-urls + - --remove-empty-fields + - --wrap=80 + # license header + - repo: https://github.com/Lucas-C/pre-commit-hooks + rev: v1.5.5 + hooks: + # C++, js + - id: insert-license files: \.(c|cc|cpp|js|ts|h|hpp)$ args: - - --license-filepath - - .license-header.txt - - --comment-style - - // - - --no-extra-eol + - --license-filepath + - .license-header.txt + - --comment-style + - // + - --no-extra-eol exclude: ^source/3rdparty|source/lib/src/gpu/cudart/.+\.inc - # CSS - - id: insert-license + # CSS + - id: insert-license files: \.(css|scss)$ args: - - --license-filepath - - .license-header.txt - - --comment-style - - /*| *| */ - - --no-extra-eol - # Python - - id: insert-license + - --license-filepath + - .license-header.txt + - --comment-style + - /*| *| */ + - --no-extra-eol + # Python + - id: insert-license files: \.(py|pyx)$ args: - - --license-filepath - - .license-header.txt - - --comment-style - - "#" - - --no-extra-eol + - --license-filepath + - .license-header.txt + - --comment-style + - "#" + - --no-extra-eol exclude: ^source/3rdparty - # HTML - - id: insert-license + # HTML + - id: insert-license files: \.(html|vue|xml)$ args: - - --license-filepath - - .license-header.txt - - --comment-style - - - - --no-extra-eol + - --license-filepath + - .license-header.txt + - --comment-style + - + - --no-extra-eol + - repo: local + hooks: + - id: disallow-caps + name: Disallow improper capitalization + language: pygrep + entry: DeepMD|DeepMd|Pytorch|Tensorflow|Numpy|Github|Lammps|I-Pi|I-PI|i-Pi + # unclear why PairDeepMD is used instead of PairDeePMD + exclude: .pre-commit-config.yaml|source/lmp + # customized pylint rules + - repo: https://github.com/pylint-dev/pylint/ + rev: v3.3.1 + hooks: + - id: pylint + entry: env PYTHONPATH=source/checker pylint + files: ^deepmd/ ci: autoupdate_branch: devel diff --git a/.readthedocs.yml b/.readthedocs.yml index 6f3ff6be3f..499411eaa3 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -2,7 +2,13 @@ version: 2 build: os: ubuntu-20.04 tools: - python: mambaforge-4.10 -conda: - environment: doc/environment.yml -formats: all + python: "3.11" + jobs: + post_create_environment: + - pip install uv + post_install: + - VIRTUAL_ENV=$READTHEDOCS_VIRTUALENV_PATH uv pip install -r doc/requirements.txt + apt_packages: + - inkscape +formats: + - pdf diff --git a/CITATIONS.bib b/CITATIONS.bib index ac682b28f7..d5524a14f6 100644 --- a/CITATIONS.bib +++ b/CITATIONS.bib @@ -1,272 +1,364 @@ -The proposed feature of each article is described in the "annote" field. -Please cite a article if any feature is used - -@article{Wang_ComputPhysCommun_2018_v228_p178, - annote = {general purpose}, - author = {Wang, Han and Zhang, Linfeng and Han, Jiequn and E, Weinan}, - doi = {10.1016/j.cpc.2018.03.016}, - year = 2018, - month = {jul}, - publisher = {Elsevier {BV}}, - volume = 228, - journal = {Comput. Phys. Comm.}, - title = {{DeePMD-kit: A deep learning package for many-body potential - energy representation and molecular dynamics}}, - pages = {178--184}, -} - -@Article{Zeng_JChemPhys_2023_v159_p054801, - annote = {general purpose}, - title = {{DeePMD-kit v2: A software package for deep potential models}}, - author = {Jinzhe Zeng and Duo Zhang and Denghui Lu and Pinghui Mo and Zeyu Li - and Yixiao Chen and Mari{\'a}n Rynik and Li'ang Huang and Ziyao Li and - Shaochen Shi and Yingze Wang and Haotian Ye and Ping Tuo and Jiabin - Yang and Ye Ding and Yifan Li and Davide Tisi and Qiyu Zeng and Han - Bao and Yu Xia and Jiameng Huang and Koki Muraoka and Yibo Wang and - Junhan Chang and Fengbo Yuan and Sigbj{\o}rn L{\o}land Bore and Chun - Cai and Yinnian Lin and Bo Wang and Jiayan Xu and Jia-Xin Zhu and - Chenxing Luo and Yuzhi Zhang and Rhys E A Goodall and Wenshuo Liang - and Anurag Kumar Singh and Sikai Yao and Jingchao Zhang and Renata - Wentzcovitch and Jiequn Han and Jie Liu and Weile Jia and Darrin M - York and Weinan E and Roberto Car and Linfeng Zhang and Han Wang}, - journal = {J. Chem. Phys.}, - volume = 159, - issue = 5, - year = 2023, - pages = 054801, - doi = {10.1063/5.0155600}, -} - - -@article{Lu_CompPhysCommun_2021_v259_p107624, - annote = {GPU support}, - title={{86 PFLOPS Deep Potential Molecular Dynamics simulation of 100 million - atoms with ab initio accuracy}}, - author={Lu, Denghui and Wang, Han and Chen, Mohan and Lin, Lin and Car, Roberto - and E, Weinan and Jia, Weile and Zhang, Linfeng}, - journal={Comput. Phys. Comm.}, - volume={259}, - pages={107624}, - year={2021}, - publisher={Elsevier}, - doi={10.1016/j.cpc.2020.107624}, -} - -@article{Zhang_PhysRevLett_2018_v120_p143001, - annote = {local frame (loc_frame)}, - author = {Linfeng Zhang and Jiequn Han and Han Wang and - Roberto Car and Weinan E}, - journal = {Phys. Rev. Lett.}, - number = {14}, - pages = {143001}, - publisher = {APS}, - title = {{Deep potential molecular dynamics: a scalable model - with the accuracy of quantum mechanics}}, - volume = {120}, - year = {2018}, - doi = {10.1103/PhysRevLett.120.143001} -} - -@incollection{Zhang_BookChap_NIPS_2018_v31_p4436, - annote = {DeepPot-SE (se_e2_a, se_e2_r, se_e3, se_atten)}, - title = {{End-to-end Symmetry Preserving Inter-atomic Potential Energy Model - for Finite and Extended Systems}}, - author = {Zhang, Linfeng and Han, Jiequn and Wang, Han and Saidi, Wissam and - Car, Roberto and E, Weinan}, - booktitle = {Advances in Neural Information Processing Systems 31}, - editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. - Cesa-Bianchi and R. Garnett}, - pages = {4436--4446}, - year = {2018}, - publisher = {Curran Associates, Inc.}, - url = {https://dl.acm.org/doi/10.5555/3327345.3327356} -} - -@Article{Wang_NuclFusion_2022_v62_p126013, - annote = {three-body embedding DeepPot-SE (se_e3)}, - author = {Xiaoyang Wang and Yinan Wang and Linfeng Zhang and Fuzhi Dai and Han - Wang}, - title = {{A tungsten deep neural-network potential for simulating mechanical - property degradation under fusion service environment}}, - journal = {Nucl. Fusion}, - year = 2022, - volume = 62, - issue = 12, - pages = 126013, - doi = {10.1088/1741-4326/ac888b}, -} - -@misc{Zhang_2022_DPA1, - annote = {attention-based descriptor}, - author = {Zhang, Duo and Bi, Hangrui and Dai, Fu-Zhi and Jiang, Wanrun and Zhang, Linfeng and Wang, Han}, - title = {{DPA-1: Pretraining of Attention-based Deep Potential Model for Molecular Simulation}}, - publisher = {arXiv}, - year = {2022}, - doi = {10.48550/arXiv.2208.08236}, -} - -@article{Zhang_PhysPlasmas_2020_v27_p122704, - annote = {frame-specific parameters (e.g. electronic temperature)}, - author = {Zhang, Yuzhi and Gao, Chang and Liu, Qianrui and Zhang, Linfeng and Wang, Han and Chen, Mohan}, - title = {{Warm dense matter simulation via electron temperature dependent deep potential molecular dynamics}}, - journal = {Phys. Plasmas}, - volume = {27}, - number = {12}, - pages = {122704}, - year = {2020}, - month = {12}, - doi = {10.1063/5.0023265}, -} - -@misc{Zeng_2023_TTMDPMD, - annote = {atom-specific parameter (e.g. electron temperature) }, - author = {Zeng, Qiyu and Chen, Bo and Zhang, Shen and Kang, Dongdong and Wang, Han and Yu, Xiaoxiang and Dai, Jiayu}, - title = {{Full-scale ab initio simulations of laser-driven atomistic dynamics}}, - publisher = {arXiv}, - year = {2023}, - doi = {10.48550/arXiv.2308.13863}, -} - -@article{Zhang_PhysRevB_2020_v102_p41121, - annote = {fit dipole}, - title={{Deep neural network for the dielectric response of insulators}}, - author={Zhang, Linfeng and Chen, Mohan and Wu, Xifan and Wang, Han and E, Weinan and Car, Roberto}, - journal={Phys. Rev. B}, - volume={102}, - number={4}, - pages={041121}, - year={2020}, - publisher={APS}, - doi={10.1103/PhysRevB.102.041121} -} - -@article{Sommers_PhysChemChemPhys_2020_v22_p10592, - annote = {fit polarizability}, - title={{Raman spectrum and polarizability of liquid water from deep neural networks}}, - author={Sommers, Grace M and Andrade, Marcos F Calegari and Zhang, Linfeng and Wang, Han and Car, Roberto}, - journal={Phys. Chem. Chem. Phys.}, - volume={22}, - number={19}, - pages={10592--10602}, - year={2020}, - publisher={Royal Society of Chemistry}, - doi={10.1039/D0CP01893G} -} - -@Article{Zeng_JChemTheoryComput_2023_v19_p1261, - annote = {fit relative energies}, - author = {Jinzhe Zeng and Yujun Tao and Timothy J Giese and Darrin M York}, - title = {{QD{\pi}: A Quantum Deep Potential Interaction Model for - Drug Discovery}}, - journal = {J. Chem. Theory Comput.}, - year = 2023, - volume = 19, - issue = 4, - pages = {1261--1275}, - doi = {10.1021/acs.jctc.2c01172}, -} - -@Article{Zeng_PhysRevB_2022_v105_p174109, - annote = {fit density of states}, - author = {Qiyu Zeng and Bo Chen and Xiaoxiang Yu and Shen Zhang and Dongdong - Kang and Han Wang and Jiayu Dai}, - title = {{Towards large-scale and spatiotemporally resolved diagnosis of - electronic density of states by deep learning}}, - journal = {Phys. Rev. B}, - year = 2022, - volume = 105, - issue = 17, - pages = 174109, - doi = {10.1103/PhysRevB.105.174109}, -} - -@Article{Zhang_JChemPhys_2022_v156_p124107, - annote = {DPLR, se_e2_r, hybrid descriptor}, - author = {Linfeng Zhang and Han Wang and Maria Carolina Muniz and Athanassios Z - Panagiotopoulos and Roberto Car and Weinan E}, - title = {{A deep potential model with long-range electrostatic interactions}}, - journal = {J. Chem. Phys.}, - year = 2022, - volume = 156, - issue = 12, - pages = 124107, - doi = {10.1063/5.0083669}, -} - -@article{Zeng_JChemTheoryComput_2021_v17_p6993, - annote= {DPRc}, - title={{Development of Range-Corrected Deep Learning Potentials for Fast, Accurate Quantum Mechanical/molecular Mechanical Simulations of Chemical Reactions in Solution}}, - author={Zeng, Jinzhe and Giese, Timothy J and Ekesan, {\c{S}}{\"o}len and York, Darrin M}, - journal={J. Chem. Theory Comput.}, - year=2021, - volume=17, - issue=11, - pages={6993-7009}, - doi = {10.1021/acs.jctc.1c00201}, -} - -@article{Wang_ApplPhysLett_2019_v114_p244101, - annote = {Interpolation with a pair-wise potential}, - title={{Deep learning inter-atomic potential model for accurate irradiation damage simulations}}, - author={Wang, Hao and Guo, Xun and Zhang, Linfeng and Wang, Han and Xue, Jianming}, - journal={Appl. Phys. Lett.}, - volume={114}, - number={24}, - pages={244101}, - year={2019}, - publisher={AIP Publishing LLC}, - doi={10.1063/1.5098061}, -} - -@article{Zhang_PhysRevMater_2019_v3_p23804, - annote = {model deviation}, - title = {{Active learning of uniformly accurate interatomic potentials for materials simulation}}, - author = {Linfeng Zhang and De-Ye Lin and Han Wang and Roberto Car and Weinan E}, - journal = {Phys. Rev. Mater.}, - volume = 3, - issue = 2, - pages = 23804, - year = 2019, - publisher = {American Physical Society}, - doi = {10.1103/PhysRevMaterials.3.023804}, -} - -@article{Lu_JChemTheoryComput_2022_v18_p5555, - annote = {DP Compress}, - author = {Denghui Lu and Wanrun Jiang and Yixiao Chen and Linfeng Zhang and - Weile Jia and Han Wang and Mohan Chen}, - title = {{DP Compress: A Model Compression Scheme for Generating Efficient Deep - Potential Models}}, - journal = {J. Chem. Theory Comput.}, - year = 2022, - volume=18, - issue=9, - pages={5555--5567}, - doi = {10.1021/acs.jctc.2c00102}, -} - -@article{Mo_npjComputMater_2022_v8_p107, - annote = {NVNMD}, - author = {Pinghui Mo and Chang Li and Dan Zhao and Yujia Zhang and Mengchao Shi - and Junhua Li and Jie Liu}, - title = {{Accurate and efficient molecular dynamics based on machine learning - and non von Neumann architecture}}, - journal = {npj Comput. Mater.}, - year = 2022, - volume = 8, - issue = 1, - pages = 107, - doi = {10.1038/s41524-022-00773-z}, -} - -@article{Zeng_EnergyFuels_2021_v35_p762, - annote = {relative or atomic model deviation}, - author = {Jinzhe Zeng and Linfeng Zhang and Han Wang and Tong Zhu}, - title = {{Exploring the Chemical Space of Linear Alkane Pyrolysis via Deep Potential GENerator}}, - journal = {Energy \& Fuels}, - volume = 35, - number = 1, - pages = {762--769}, - year = 2021, - doi = {10.1021/acs.energyfuels.0c03211}, -} +The proposed feature of each article is described in the "annote" field. +Please cite a article if any feature is used +@article{Wang_ComputPhysCommun_2018_v228_p178, + annote = {general purpose}, + author = {Wang, Han and Zhang, Linfeng and Han, Jiequn and E, Weinan}, + doi = {10.1016/j.cpc.2018.03.016}, + year = 2018, + month = {jul}, + publisher = {Elsevier {BV}}, + volume = 228, + journal = {Comput. Phys. Comm.}, + title = { + {DeePMD-kit: A deep learning package for many-body potential energy + representation and molecular dynamics} + }, + pages = {178--184}, +} + +@article{Zeng_JChemPhys_2023_v159_p054801, + annote = {general purpose}, + title = {{DeePMD-kit v2: A software package for deep potential models}}, + author = { + Jinzhe Zeng and Duo Zhang and Denghui Lu and Pinghui Mo and Zeyu Li and + Yixiao Chen and Mari{\'a}n Rynik and Li'ang Huang and Ziyao Li and Shaochen + Shi and Yingze Wang and Haotian Ye and Ping Tuo and Jiabin Yang and Ye Ding + and Yifan Li and Davide Tisi and Qiyu Zeng and Han Bao and Yu Xia and + Jiameng Huang and Koki Muraoka and Yibo Wang and Junhan Chang and Fengbo + Yuan and Sigbj{\o}rn L{\o}land Bore and Chun Cai and Yinnian Lin and Bo + Wang and Jiayan Xu and Jia-Xin Zhu and Chenxing Luo and Yuzhi Zhang and + Rhys E A Goodall and Wenshuo Liang and Anurag Kumar Singh and Sikai Yao and + Jingchao Zhang and Renata Wentzcovitch and Jiequn Han and Jie Liu and Weile + Jia and Darrin M York and Weinan E and Roberto Car and Linfeng Zhang and + Han Wang + }, + journal = {J. Chem. Phys.}, + volume = 159, + issue = 5, + year = 2023, + pages = 054801, + doi = {10.1063/5.0155600}, +} + +@article{Lu_CompPhysCommun_2021_v259_p107624, + annote = {GPU support}, + title = { + {86 PFLOPS Deep Potential Molecular Dynamics simulation of 100 million + atoms with ab initio accuracy} + }, + author = { + Lu, Denghui and Wang, Han and Chen, Mohan and Lin, Lin and Car, Roberto and + E, Weinan and Jia, Weile and Zhang, Linfeng + }, + journal = {Comput. Phys. Comm.}, + volume = 259, + pages = 107624, + year = 2021, + publisher = {Elsevier}, + doi = {10.1016/j.cpc.2020.107624}, +} + +@article{Zhang_PhysRevLett_2018_v120_p143001, + annote = {local frame (loc\_frame)}, + author = {Linfeng Zhang and Jiequn Han and Han Wang and Roberto Car and Weinan E}, + journal = {Phys. Rev. Lett.}, + number = 14, + pages = 143001, + publisher = {APS}, + title = { + {Deep potential molecular dynamics: a scalable model with the accuracy of + quantum mechanics} + }, + volume = 120, + year = 2018, + doi = {10.1103/PhysRevLett.120.143001}, +} + +@incollection{Zhang_BookChap_NIPS_2018_v31_p4436, + annote = {DeepPot-SE (se\_e2\_a, se\_e2\_r, se\_e3, se\_atten)}, + title = { + {End-to-end Symmetry Preserving Inter-atomic Potential Energy Model for + Finite and Extended Systems} + }, + author = { + Zhang, Linfeng and Han, Jiequn and Wang, Han and Saidi, Wissam and Car, + Roberto and E, Weinan + }, + booktitle = {Advances in Neural Information Processing Systems 31}, + editor = { + S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. + Cesa-Bianchi and R. Garnett + }, + pages = {4436--4446}, + year = 2018, + publisher = {Curran Associates, Inc.}, + url = {https://dl.acm.org/doi/10.5555/3327345.3327356}, +} + +@article{Wang_NuclFusion_2022_v62_p126013, + annote = {three-body embedding DeepPot-SE (se\_e3)}, + author = {Xiaoyang Wang and Yinan Wang and Linfeng Zhang and Fuzhi Dai and Han Wang}, + title = { + {A tungsten deep neural-network potential for simulating mechanical + property degradation under fusion service environment} + }, + journal = {Nucl. Fusion}, + year = 2022, + volume = 62, + issue = 12, + pages = 126013, + doi = {10.1088/1741-4326/ac888b}, +} + +@article{Zhang_NpjComputMater_2024_v10_p94, + annote = {DPA-1, attention-based descriptor}, + author = { + Duo Zhang and Hangrui Bi and Fu-Zhi Dai and Wanrun Jiang and Xinzijian Liu + and Linfeng Zhang and Han Wang + }, + title = { + {Pretraining of attention-based deep learning potential model for molecular + simulation} + }, + journal = {Npj Comput. Mater}, + year = 2024, + volume = 10, + issue = 1, + pages = 94, + doi = {10.1038/s41524-024-01278-7}, +} + +@misc{Zhang_2023_DPA2, + annote = {DPA-2}, + author = { + Duo Zhang and Xinzijian Liu and Xiangyu Zhang and Chengqian Zhang and Chun + Cai and Hangrui Bi and Yiming Du and Xuejian Qin and Jiameng Huang and + Bowen Li and Yifan Shan and Jinzhe Zeng and Yuzhi Zhang and Siyuan Liu and + Yifan Li and Junhan Chang and Xinyan Wang and Shuo Zhou and Jianchuan Liu + and Xiaoshan Luo and Zhenyu Wang and Wanrun Jiang and Jing Wu and Yudi Yang + and Jiyuan Yang and Manyi Yang and Fu-Qiang Gong and Linshuang Zhang and + Mengchao Shi and Fu-Zhi Dai and Darrin M. York and Shi Liu and Tong Zhu and + Zhicheng Zhong and Jian Lv and Jun Cheng and Weile Jia and Mohan Chen and + Guolin Ke and Weinan E and Linfeng Zhang and Han Wang + }, + title = { + {DPA-2: Towards a universal large atomic model for molecular and material + simulation} + }, + publisher = {arXiv}, + year = 2023, + doi = {10.48550/arXiv.2312.15492}, +} + +@article{Zhang_PhysPlasmas_2020_v27_p122704, + annote = {frame-specific parameters (e.g. electronic temperature)}, + author = { + Zhang, Yuzhi and Gao, Chang and Liu, Qianrui and Zhang, Linfeng and Wang, + Han and Chen, Mohan + }, + title = { + {Warm dense matter simulation via electron temperature dependent deep + potential molecular dynamics} + }, + journal = {Phys. Plasmas}, + volume = 27, + number = 12, + pages = 122704, + year = 2020, + month = 12, + doi = {10.1063/5.0023265}, +} + +@misc{Zeng_2023_TTMDPMD, + annote = {atom-specific parameter (e.g. electron temperature)}, + author = { + Zeng, Qiyu and Chen, Bo and Zhang, Shen and Kang, Dongdong and Wang, Han + and Yu, Xiaoxiang and Dai, Jiayu + }, + title = {{Full-scale ab initio simulations of laser-driven atomistic dynamics}}, + publisher = {arXiv}, + year = 2023, + doi = {10.48550/arXiv.2308.13863}, +} + +@article{Zhang_PhysRevB_2020_v102_p41121, + annote = {fit dipole}, + title = {{Deep neural network for the dielectric response of insulators}}, + author = { + Zhang, Linfeng and Chen, Mohan and Wu, Xifan and Wang, Han and E, Weinan + and Car, Roberto + }, + journal = {Phys. Rev. B}, + volume = 102, + number = 4, + pages = {041121}, + year = 2020, + publisher = {APS}, + doi = {10.1103/PhysRevB.102.041121}, +} + +@article{Sommers_PhysChemChemPhys_2020_v22_p10592, + annote = {fit polarizability}, + title = { + {Raman spectrum and polarizability of liquid water from deep neural + networks} + }, + author = { + Sommers, Grace M and Andrade, Marcos F Calegari and Zhang, Linfeng and + Wang, Han and Car, Roberto + }, + journal = {Phys. Chem. Chem. Phys.}, + volume = 22, + number = 19, + pages = {10592--10602}, + year = 2020, + publisher = {Royal Society of Chemistry}, + doi = {10.1039/D0CP01893G}, +} + +@article{Zeng_JChemTheoryComput_2023_v19_p1261, + annote = {fit relative energies}, + author = {Jinzhe Zeng and Yujun Tao and Timothy J Giese and Darrin M York}, + title = {{QD{\pi}: A Quantum Deep Potential Interaction Model for Drug Discovery}}, + journal = {J. Chem. Theory Comput.}, + year = 2023, + volume = 19, + issue = 4, + pages = {1261--1275}, + doi = {10.1021/acs.jctc.2c01172}, +} + +@article{Zeng_PhysRevB_2022_v105_p174109, + annote = {fit density of states}, + author = { + Qiyu Zeng and Bo Chen and Xiaoxiang Yu and Shen Zhang and Dongdong Kang and + Han Wang and Jiayu Dai + }, + title = { + {Towards large-scale and spatiotemporally resolved diagnosis of electronic + density of states by deep learning} + }, + journal = {Phys. Rev. B}, + year = 2022, + volume = 105, + issue = 17, + pages = 174109, + doi = {10.1103/PhysRevB.105.174109}, +} + +@article{Zhang_JChemPhys_2022_v156_p124107, + annote = {DPLR, se\_e2\_r, hybrid descriptor}, + author = { + Linfeng Zhang and Han Wang and Maria Carolina Muniz and Athanassios Z + Panagiotopoulos and Roberto Car and Weinan E + }, + title = {{A deep potential model with long-range electrostatic interactions}}, + journal = {J. Chem. Phys.}, + year = 2022, + volume = 156, + issue = 12, + pages = 124107, + doi = {10.1063/5.0083669}, +} + +@article{Zeng_JChemTheoryComput_2021_v17_p6993, + annote = {DPRc}, + title = { + {Development of Range-Corrected Deep Learning Potentials for Fast, Accurate + Quantum Mechanical/molecular Mechanical Simulations of Chemical Reactions + in Solution} + }, + author = { + Zeng, Jinzhe and Giese, Timothy J and Ekesan, {\c{S}}{\"o}len and York, + Darrin M + }, + journal = {J. Chem. Theory Comput.}, + year = 2021, + volume = 17, + issue = 11, + pages = {6993--7009}, + doi = {10.1021/acs.jctc.1c00201}, +} + +@article{Wang_ApplPhysLett_2019_v114_p244101, + annote = {Interpolation with a pair-wise potential}, + title = { + {Deep learning inter-atomic potential model for accurate irradiation damage + simulations} + }, + author = {Wang, Hao and Guo, Xun and Zhang, Linfeng and Wang, Han and Xue, Jianming}, + journal = {Appl. Phys. Lett.}, + volume = 114, + number = 24, + pages = 244101, + year = 2019, + publisher = {AIP Publishing LLC}, + doi = {10.1063/1.5098061}, +} + +@article{Zhang_PhysRevMater_2019_v3_p23804, + annote = {model deviation}, + title = { + {Active learning of uniformly accurate interatomic potentials for materials + simulation} + }, + author = {Linfeng Zhang and De-Ye Lin and Han Wang and Roberto Car and Weinan E}, + journal = {Phys. Rev. Mater.}, + volume = 3, + issue = 2, + pages = 23804, + year = 2019, + publisher = {American Physical Society}, + doi = {10.1103/PhysRevMaterials.3.023804}, +} + +@article{Lu_JChemTheoryComput_2022_v18_p5555, + annote = {DP Compress}, + author = { + Denghui Lu and Wanrun Jiang and Yixiao Chen and Linfeng Zhang and Weile Jia + and Han Wang and Mohan Chen + }, + title = { + {DP Compress: A Model Compression Scheme for Generating Efficient Deep + Potential Models} + }, + journal = {J. Chem. Theory Comput.}, + year = 2022, + volume = 18, + issue = 9, + pages = {5555--5567}, + doi = {10.1021/acs.jctc.2c00102}, +} + +@article{Mo_npjComputMater_2022_v8_p107, + annote = {NVNMD}, + author = { + Pinghui Mo and Chang Li and Dan Zhao and Yujia Zhang and Mengchao Shi and + Junhua Li and Jie Liu + }, + title = { + {Accurate and efficient molecular dynamics based on machine learning and + non von Neumann architecture} + }, + journal = {npj Comput. Mater.}, + year = 2022, + volume = 8, + issue = 1, + pages = 107, + doi = {10.1038/s41524-022-00773-z}, +} + +@article{Zeng_EnergyFuels_2021_v35_p762, + annote = {relative or atomic model deviation}, + author = {Jinzhe Zeng and Linfeng Zhang and Han Wang and Tong Zhu}, + title = { + {Exploring the Chemical Space of Linear Alkane Pyrolysis via Deep Potential + GENerator} + }, + journal = {Energy \& Fuels}, + volume = 35, + number = 1, + pages = {762--769}, + year = 2021, + doi = {10.1021/acs.energyfuels.0c03211}, +} diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index e43e23beb6..cb08609c2b 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -7,6 +7,7 @@ Welcome to [DeePMD-kit](https://github.com/deepmodeling/deepmd-kit)! You can either make a code contribution, help improve our document or offer help to other users. Your help is always appreciated. Come and have fun! ### Code contribution + You can start from any one of the following items to help improve deepmd-kit - Smash a bug @@ -18,6 +19,7 @@ See [here](#before-you-contribute) for some before-hand heads-up. See [here](#how-to-contribute) to learn how to contribute. ### Document improvement + You can start from any one of the following items to help improve [DeePMD-kit Docs](https://deepmd.readthedocs.io/en/latest/?badge=latest): - Fix typos or format (punctuation, space, indentation, code block, etc.) @@ -26,21 +28,27 @@ You can start from any one of the following items to help improve [DeePMD-kit Do - Translate docs changes from English to Chinese ### Offer help + You can help other users of deepmd-kit in the following way - Submit, reply to, and resolve [issues](https://github.com/deepmodeling/deepmd-kit/issues) - (Advanced) Review Pull Requests created by others ## Before you contribute + ### Overview of DeePMD-kit + Currently, we maintain two main branch: + - master: stable branch with version tag -- devel : branch for developers +- devel : branch for developers ### Developer guide -See [here](doc/development/index.md) for coding conventions, API and other needs-to-know of the code. + +See [documentation](https://deepmd.readthedocs.io/) for coding conventions, API and other needs-to-know of the code. ## How to contribute + Please perform the following steps to create your Pull Request to this repository. If don't like to use commands, you can also use [GitHub Desktop](https://desktop.github.com/), which is easier to get started. Go to [git documentation](https://git-scm.com/doc) if you want to really master git. ### Step 1: Fork the repository @@ -51,79 +59,82 @@ Please perform the following steps to create your Pull Request to this repositor ### Step 2: Clone the forked repository to local storage and set configurations 1. Clone your own repo, not the public repo (from deepmodeling) ! And change the branch to devel. - ```bash - git clone https://github.com/$username/deepmd-kit.git - # Replace `$username` with your GitHub ID - git checkout devel - ``` + ```bash + git clone https://github.com/$username/deepmd-kit.git + # Replace `$username` with your GitHub ID + + git checkout devel + ``` 2. Add deepmodeling's repo as your remote repo, we can name it "upstream". And fetch upstream's latest codes to your workstation. - ```bash - git remote add upstream https://github.com/deepmodeling/deepmd-kit.git - # After you add a remote repo, your local repo will be automatically named "origin". - git fetch upstream + ```bash + git remote add upstream https://github.com/deepmodeling/deepmd-kit.git + # After you add a remote repo, your local repo will be automatically named "origin". - # If your current codes are behind the latest codes, you should merge latest codes first. - # Notice you should merge from "devel"! - git merge upstream/devel - ``` + git fetch upstream + + # If your current codes are behind the latest codes, you should merge latest codes first. + # Notice you should merge from "devel"! + git merge upstream/devel + ``` 3. Modify your codes and design unit tests. 4. Commit your changes - ```bash - git status # Checks the local status - git add ... # Adds the file(s) you want to commit. If you want to commit all changes, you can directly use `git add.` - git commit -m "commit-message: update the xx" - ``` + + ```bash + git status # Checks the local status + git add ... # Adds the file(s) you want to commit. If you want to commit all changes, you can directly use `git add.` + git commit -m "commit-message: update the xx" + ``` 5. Push the changed codes to your original repo on github. - ```bash - git push origin devel - ``` + ```bash + git push origin devel + ``` ### Alternatively: Create a new branch 1. Get your local master up-to-date with upstream/master. - ```bash - cd $working_dir/deepmd-kit - git fetch upstream - git checkout master - git rebase upstream/master - ``` + ```bash + cd $working_dir/deepmd-kit + git fetch upstream + git checkout master + git rebase upstream/master + ``` 2. Create a new branch based on the master branch. - ```bash - git checkout -b new-branch-name - ``` + ```bash + git checkout -b new-branch-name + ``` 3. Modify your codes and design unit tests. 4. Commit your changes - ```bash - git status # Checks the local status - git add ... # Adds the file(s) you want to commit. If you want to commit all changes, you can directly use `git add.` - git commit -m "commit-message: update the xx" - ``` + ```bash + git status # Checks the local status + git add ... # Adds the file(s) you want to commit. If you want to commit all changes, you can directly use `git add.` + git commit -m "commit-message: update the xx" + ``` 5. Keep your branch in sync with upstream/master - ```bash - # While on your new branch - git fetch upstream - git rebase upstream/master - ``` + ```bash + # While on your new branch + git fetch upstream + git rebase upstream/master + ``` 6. Push your changes to the remote - ```bash - git push -u origin new-branch-name # "-u" is used to track the remote branch from origin - ``` + ```bash + git push -u origin new-branch-name # "-u" is used to track the remote branch from origin + ``` ### Step 3: Create a pull request @@ -133,4 +144,5 @@ Please perform the following steps to create your Pull Request to this repositor Now, your PR is successfully submitted! After this PR is merged, you will automatically become a contributor to DeePMD-kit. ## Contact us + E-mail: contact@deepmodeling.org diff --git a/README.md b/README.md index 81fdead098..18bdfd6560 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,9 @@ [DeePMD-kit logo](./doc/logo.md) --------------------------------------------------------------------------------- +--- + +# DeePMD-kit -DeePMD-kit Manual -======== [![GitHub release](https://img.shields.io/github/release/deepmodeling/deepmd-kit.svg?maxAge=86400)](https://github.com/deepmodeling/deepmd-kit/releases) [![offline packages](https://img.shields.io/github/downloads/deepmodeling/deepmd-kit/total?label=offline%20packages)](https://github.com/deepmodeling/deepmd-kit/releases) [![conda-forge](https://img.shields.io/conda/dn/conda-forge/deepmd-kit?color=red&label=conda-forge&logo=conda-forge)](https://anaconda.org/conda-forge/deepmd-kit) @@ -11,168 +11,94 @@ [![docker pull](https://img.shields.io/docker/pulls/deepmodeling/deepmd-kit)](https://hub.docker.com/r/deepmodeling/deepmd-kit) [![Documentation Status](https://readthedocs.org/projects/deepmd/badge/)](https://deepmd.readthedocs.io/) -# Table of contents -- [About DeePMD-kit](#about-deepmd-kit) - - [Highlights in v2.0](#highlights-in-deepmd-kit-v2.0) - - [Highlighted features](#highlighted-features) - - [License and credits](#license-and-credits) - - [Deep Potential in a nutshell](#deep-potential-in-a-nutshell) -- [Download and install](#download-and-install) -- [Use DeePMD-kit](#use-deepmd-kit) -- [Code structure](#code-structure) -- [Troubleshooting](#troubleshooting) - -# About DeePMD-kit +## About DeePMD-kit + DeePMD-kit is a package written in Python/C++, designed to minimize the effort required to build deep learning-based model of interatomic potential energy and force field and to perform molecular dynamics (MD). This brings new hopes to addressing the accuracy-versus-efficiency dilemma in molecular simulations. Applications of DeePMD-kit span from finite molecules to extended systems and from metallic systems to chemically bonded systems. For more information, check the [documentation](https://deepmd.readthedocs.io/). -# Highlights in DeePMD-kit v2.0 -* [Model compression](doc/freeze/compress.md). Accelerate the efficiency of model inference 4-15 times. -* [New descriptors](doc/model/overall.md). Including [`se_e2_r`](doc/model/train-se-e2-r.md) and [`se_e3`](doc/model/train-se-e3.md). -* [Hybridization of descriptors](doc/model/train-hybrid.md). Hybrid descriptor constructed from the concatenation of several descriptors. -* [Atom type embedding](doc/model/train-se-e2-a-tebd.md). Enable atom-type embedding to decline training complexity and refine performance. -* Training and inference of the dipole (vector) and polarizability (matrix). -* Split of training and validation dataset. -* Optimized training on GPUs. - -## Highlighted features -* **interfaced with TensorFlow**, one of the most popular deep learning frameworks, making the training process highly automatic and efficient, in addition, Tensorboard can be used to visualize training procedures. -* **interfaced with high-performance classical MD and quantum (path-integral) MD packages**, i.e., LAMMPS and i-PI, respectively. -* **implements the Deep Potential series models**, which have been successfully applied to finite and extended systems including organic molecules, metals, semiconductors, insulators, etc. -* **implements MPI and GPU supports**, making it highly efficient for high-performance parallel and distributed computing. -* **highly modularized**, easy to adapt to different descriptors for deep learning-based potential energy models. - -## License and credits +### Highlighted features + +- **interfaced with multiple backends**, including TensorFlow, PyTorch, and JAX, the most popular deep learning frameworks, making the training process highly automatic and efficient. +- **interfaced with high-performance classical MD and quantum (path-integral) MD packages**, including LAMMPS, i-PI, AMBER, CP2K, GROMACS, OpenMM, and ABACUS. +- **implements the Deep Potential series models**, which have been successfully applied to finite and extended systems, including organic molecules, metals, semiconductors, insulators, etc. +- **implements MPI and GPU supports**, making it highly efficient for high-performance parallel and distributed computing. +- **highly modularized**, easy to adapt to different descriptors for deep learning-based potential energy models. + +### License and credits + The project DeePMD-kit is licensed under [GNU LGPLv3.0](./LICENSE). If you use this code in any future publications, please cite the following publications for general purpose: + - Han Wang, Linfeng Zhang, Jiequn Han, and Weinan E. "DeePMD-kit: A deep learning package for many-body potential energy representation and molecular dynamics." Computer Physics Communications 228 (2018): 178-184. -[![doi:10.1016/j.cpc.2018.03.016](https://img.shields.io/badge/DOI-10.1016%2Fj.cpc.2018.03.016-blue)](https://doi.org/10.1016/j.cpc.2018.03.016) -[![Citations](https://citations.njzjz.win/10.1016/j.cpc.2018.03.016)](https://badge.dimensions.ai/details/doi/10.1016/j.cpc.2018.03.016) + [![doi:10.1016/j.cpc.2018.03.016](https://img.shields.io/badge/DOI-10.1016%2Fj.cpc.2018.03.016-blue)](https://doi.org/10.1016/j.cpc.2018.03.016) + [![Citations](https://citations.njzjz.win/10.1016/j.cpc.2018.03.016)](https://badge.dimensions.ai/details/doi/10.1016/j.cpc.2018.03.016) - Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen, Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang. "DeePMD-kit v2: A software package for deep potential models." J. Chem. Phys. 159 (2023): 054801. -[![doi:10.1063/5.0155600](https://img.shields.io/badge/DOI-10.1063%2F5.0155600-blue)](https://doi.org/10.1063/5.0155600) -[![Citations](https://citations.njzjz.win/10.1063/5.0155600)](https://badge.dimensions.ai/details/doi/10.1063/5.0155600) + [![doi:10.1063/5.0155600](https://img.shields.io/badge/DOI-10.1063%2F5.0155600-blue)](https://doi.org/10.1063/5.0155600) + [![Citations](https://citations.njzjz.win/10.1063/5.0155600)](https://badge.dimensions.ai/details/doi/10.1063/5.0155600) In addition, please follow [the bib file](CITATIONS.bib) to cite the methods you used. -## Deep Potential in a nutshell -The goal of Deep Potential is to employ deep learning techniques and realize an inter-atomic potential energy model that is general, accurate, computationally efficient and scalable. The key component is to respect the extensive and symmetry-invariant properties of a potential energy model by assigning a local reference frame and a local environment to each atom. Each environment contains a finite number of atoms, whose local coordinates are arranged in a symmetry-preserving way. These local coordinates are then transformed, through a sub-network, to so-called *atomic energy*. Summing up all the atomic energies gives the potential energy of the system. +### Highlights in major versions + +#### Initial version -The initial proof of concept is in the [Deep Potential][1] paper, which employed an approach that was devised to train the neural network model with the potential energy only. With typical *ab initio* molecular dynamics (AIMD) datasets this is insufficient to reproduce the trajectories. The Deep Potential Molecular Dynamics ([DeePMD][2]) model overcomes this limitation. In addition, the learning process in DeePMD improves significantly over the Deep Potential method thanks to the introduction of a flexible family of loss functions. The NN potential constructed in this way reproduces accurately the AIMD trajectories, both classical and quantum (path integral), in extended and finite systems, at a cost that scales linearly with system size and is always several orders of magnitude lower than that of equivalent AIMD simulations. +The goal of Deep Potential is to employ deep learning techniques and realize an inter-atomic potential energy model that is general, accurate, computationally efficient and scalable. The key component is to respect the extensive and symmetry-invariant properties of a potential energy model by assigning a local reference frame and a local environment to each atom. Each environment contains a finite number of atoms, whose local coordinates are arranged in a symmetry-preserving way. These local coordinates are then transformed, through a sub-network, to so-called _atomic energy_. Summing up all the atomic energies gives the potential energy of the system. + +The initial proof of concept is in the [Deep Potential][1] paper, which employed an approach that was devised to train the neural network model with the potential energy only. With typical _ab initio_ molecular dynamics (AIMD) datasets this is insufficient to reproduce the trajectories. The Deep Potential Molecular Dynamics ([DeePMD][2]) model overcomes this limitation. In addition, the learning process in DeePMD improves significantly over the Deep Potential method thanks to the introduction of a flexible family of loss functions. The NN potential constructed in this way reproduces accurately the AIMD trajectories, both classical and quantum (path integral), in extended and finite systems, at a cost that scales linearly with system size and is always several orders of magnitude lower than that of equivalent AIMD simulations. Although highly efficient, the original Deep Potential model satisfies the extensive and symmetry-invariant properties of a potential energy model at the price of introducing discontinuities in the model. This has negligible influence on a trajectory from canonical sampling but might not be sufficient for calculations of dynamical and mechanical properties. These points motivated us to develop the Deep Potential-Smooth Edition ([DeepPot-SE][3]) model, which replaces the non-smooth local frame with a smooth and adaptive embedding network. DeepPot-SE shows great ability in modeling many kinds of systems that are of interest in the fields of physics, chemistry, biology, and materials science. In addition to building up potential energy models, DeePMD-kit can also be used to build up coarse-grained models. In these models, the quantity that we want to parameterize is the free energy, or the coarse-grained potential, of the coarse-grained particles. See the [DeePCG paper][4] for more details. -See [our latest paper](https://doi.org/10.48550/arXiv.2304.09409) for details of all features. - -# Download and install - -Please follow our [GitHub](https://github.com/deepmodeling/deepmd-kit) webpage to download the [latest released version](https://github.com/deepmodeling/deepmd-kit/tree/master) and [development version](https://github.com/deepmodeling/deepmd-kit/tree/devel). - -DeePMD-kit offers multiple installation methods. It is recommended to use easy methods like [offline packages](doc/install/easy-install.md#offline-packages), [conda](doc/install/easy-install.md#with-conda) and [docker](doc/install/easy-install.md#with-docker). - -One may manually install DeePMD-kit by following the instructions on [installing the Python interface](doc/install/install-from-source.md#install-the-python-interface) and [installing the C++ interface](doc/install/install-from-source.md#install-the-c-interface). The C++ interface is necessary when using DeePMD-kit with LAMMPS, i-PI or GROMACS. - - -# Use DeePMD-kit - -A quick start on using DeePMD-kit can be found [here](doc/getting-started/quick_start.ipynb). - -A full [document](doc/train/train-input-auto.rst) on options in the training input script is available. - -# Advanced - -- [Installation](doc/install/index.md) - - [Easy install](doc/install/easy-install.md) - - [Install from source code](doc/install/install-from-source.md) - - [Install from pre-compiled C library](doc/install/install-from-c-library.md) - - [Install LAMMPS](doc/install/install-lammps.md) - - [Install i-PI](doc/install/install-ipi.md) - - [Install GROMACS](doc/install/install-gromacs.md) - - [Building conda packages](doc/install/build-conda.md) - - [Install Node.js interface](doc/install/install-nodejs.md) - - [Easy install the latest development version](doc/install/easy-install-dev.md) -- [Data](doc/data/index.md) - - [System](doc/data/system.md) - - [Formats of a system](doc/data/data-conv.md) - - [Prepare data with dpdata](doc/data/dpdata.md) -- [Model](doc/model/index.md) - - [Overall](doc/model/overall.md) - - [Descriptor `"se_e2_a"`](doc/model/train-se-e2-a.md) - - [Descriptor `"se_e2_r"`](doc/model/train-se-e2-r.md) - - [Descriptor `"se_e3"`](doc/model/train-se-e3.md) - - [Descriptor `"se_atten"`](doc/model/train-se-atten.md) - - [Descriptor `"se_atten_v2"`](doc/model/train-se-atten.md#descriptor-se_atten_v2) - - [Descriptor `"hybrid"`](doc/model/train-hybrid.md) - - [Descriptor `sel`](doc/model/sel.md) - - [Fit energy](doc/model/train-energy.md) - - [Fit spin energy](doc/model/train-energy-spin.md) - - [Fit `tensor` like `Dipole` and `Polarizability`](doc/model/train-fitting-tensor.md) - - [Fit electronic density of states (DOS)](doc/model/train-fitting-dos.md) - - [Train a Deep Potential model using `type embedding` approach](doc/model/train-se-e2-a-tebd.md) - - [Deep potential long-range](doc/model/dplr.md) - - [Deep Potential - Range Correction (DPRc)](doc/model/dprc.md) - - [Linear model](doc/model/linear.md) - - [Interpolation or combination with a pairwise potential](doc/model/pairtab.md) -- [Training](doc/train/index.md) - - [Training a model](doc/train/training.md) - - [Advanced options](doc/train/training-advanced.md) - - [Parallel training](doc/train/parallel-training.md) - - [Multi-task training](doc/train/multi-task-training.md) - - [TensorBoard Usage](doc/train/tensorboard.md) - - [Known limitations of using GPUs](doc/train/gpu-limitations.md) - - [Training Parameters](doc/train-input-auto.rst) -- [Freeze and Compress](doc/freeze/index.rst) - - [Freeze a model](doc/freeze/freeze.md) - - [Compress a model](doc/freeze/compress.md) -- [Test](doc/test/index.rst) - - [Test a model](doc/test/test.md) - - [Calculate Model Deviation](doc/test/model-deviation.md) -- [Inference](doc/inference/index.rst) - - [Python interface](doc/inference/python.md) - - [C++ interface](doc/inference/cxx.md) - - [Node.js interface](doc/inference/nodejs.md) -- [Integrate with third-party packages](doc/third-party/index.rst) - - [Use deep potential with ASE](doc/third-party/ase.md) - - [Run MD with LAMMPS](doc/third-party/lammps-command.md) - - [Run path-integral MD with i-PI](doc/third-party/ipi.md) - - [Run MD with GROMACS](doc/third-party/gromacs.md) - - [Interfaces out of DeePMD-kit](doc/third-party/out-of-deepmd-kit.md) -- [Use NVNMD](doc/nvnmd/index.md) - -# Code structure +#### v1 + +- Code refactor to make it highly modularized. +- GPU support for descriptors. + +#### v2 + +- Model compression. Accelerate the efficiency of model inference 4-15 times. +- New descriptors. Including `se_e2_r`, `se_e3`, and `se_atten` (DPA-1). +- Hybridization of descriptors. Hybrid descriptor constructed from the concatenation of several descriptors. +- Atom type embedding. Enable atom-type embedding to decline training complexity and refine performance. +- Training and inference of the dipole (vector) and polarizability (matrix). +- Split of training and validation dataset. +- Optimized training on GPUs, including CUDA and ROCm. +- Non-von-Neumann. +- C API to interface with the third-party packages. + +See [our latest paper](https://doi.org/10.1063/5.0155600) for details of all features until v2.2.3. + +#### v3 + +- Multiple backends supported. Add PyTorch and JAX backends. +- The DPA-2 model. +- Plugin mechanisms for external models. + +## Install and use DeePMD-kit + +Please read the [online documentation](https://deepmd.readthedocs.io/) for how to install and use DeePMD-kit. + +## Code structure The code is organized as follows: -* `data/raw`: tools manipulating the raw data files. -* `examples`: examples. -* `deepmd`: DeePMD-kit python modules. -* `source/api_cc`: source code of DeePMD-kit C++ API. -* `source/ipi`: source code of i-PI client. -* `source/lib`: source code of DeePMD-kit library. -* `source/lmp`: source code of Lammps module. -* `source/gmx`: source code of Gromacs plugin. -* `source/op`: TensorFlow op implementation. working with the library. - - -# Troubleshooting - -- [Model compatibility](doc/troubleshooting/model_compatability.md) -- [Installation](doc/troubleshooting/installation.md) -- [The temperature undulates violently during the early stages of MD](doc/troubleshooting/md_energy_undulation.md) -- [MD: cannot run LAMMPS after installing a new version of DeePMD-kit](doc/troubleshooting/md_version_compatibility.md) -- [Do we need to set rcut < half boxsize?](doc/troubleshooting/howtoset_rcut.md) -- [How to set sel?](doc/troubleshooting/howtoset_sel.md) -- [How to control the parallelism of a job?](doc/troubleshooting/howtoset_num_nodes.md) -- [How to tune Fitting/embedding-net size?](doc/troubleshooting/howtoset_netsize.md) -- [Why does a model have low precision?](doc/troubleshooting/precision.md) +- `examples`: examples. +- `deepmd`: DeePMD-kit python modules. +- `source/lib`: source code of the core library. +- `source/op`: Operator (OP) implementation. +- `source/api_cc`: source code of DeePMD-kit C++ API. +- `source/api_c`: source code of the C API. +- `source/nodejs`: source code of the Node.js API. +- `source/ipi`: source code of i-PI client. +- `source/lmp`: source code of LAMMPS module. +- `source/gmx`: source code of Gromacs plugin. # Contributing See [DeePMD-kit Contributing Guide](CONTRIBUTING.md) to become a contributor! 🤓 - [1]: https://arxiv.org/abs/1707.01478 [2]: https://journals.aps.org/prl/abstract/10.1103/PhysRevLett.120.143001 [3]: https://arxiv.org/abs/1805.09003 diff --git a/backend/dp_backend.py b/backend/dp_backend.py index d28afdb239..81c3f20f19 100644 --- a/backend/dp_backend.py +++ b/backend/dp_backend.py @@ -1,11 +1,11 @@ # SPDX-License-Identifier: LGPL-3.0-or-later """A PEP-517 backend to find TensorFlow.""" -from typing import ( - List, -) from scikit_build_core import build as _orig +from .find_pytorch import ( + find_pytorch, +) from .find_tensorflow import ( find_tensorflow, ) @@ -22,7 +22,7 @@ ] -def __dir__() -> List[str]: +def __dir__() -> list[str]: return __all__ @@ -38,11 +38,19 @@ def __dir__() -> List[str]: def get_requires_for_build_wheel( config_settings: dict, -) -> List[str]: - return _orig.get_requires_for_build_wheel(config_settings) + find_tensorflow()[1] +) -> list[str]: + return ( + _orig.get_requires_for_build_wheel(config_settings) + + find_tensorflow()[1] + + find_pytorch()[1] + ) def get_requires_for_build_editable( config_settings: dict, -) -> List[str]: - return _orig.get_requires_for_build_editable(config_settings) + find_tensorflow()[1] +) -> list[str]: + return ( + _orig.get_requires_for_build_editable(config_settings) + + find_tensorflow()[1] + + find_pytorch()[1] + ) diff --git a/backend/dynamic_metadata.py b/backend/dynamic_metadata.py index ab955c3cf8..a66e9a2759 100644 --- a/backend/dynamic_metadata.py +++ b/backend/dynamic_metadata.py @@ -1,10 +1,15 @@ # SPDX-License-Identifier: LGPL-3.0-or-later +import sys +from pathlib import ( + Path, +) from typing import ( - Dict, - List, Optional, ) +from .find_pytorch import ( + get_pt_requirement, +) from .find_tensorflow import ( get_tf_requirement, ) @@ -12,80 +17,42 @@ get_argument_from_env, ) +if sys.version_info >= (3, 11): + import tomllib +else: + import tomli as tomllib + __all__ = ["dynamic_metadata"] -def __dir__() -> List[str]: +def __dir__() -> list[str]: return __all__ def dynamic_metadata( field: str, - settings: Optional[Dict[str, object]] = None, -) -> str: + settings: Optional[dict[str, object]] = None, +): assert field in ["optional-dependencies", "entry-points", "scripts"] - _, _, find_libpython_requires, extra_scripts, tf_version = get_argument_from_env() + _, _, find_libpython_requires, extra_scripts, tf_version, pt_version = ( + get_argument_from_env() + ) + with Path("pyproject.toml").open("rb") as f: + pyproject = tomllib.load(f) + if field == "scripts": return { - "dp": "deepmd_utils.main:main", + **pyproject["tool"]["deepmd_build_backend"]["scripts"], **extra_scripts, } elif field == "optional-dependencies": + optional_dependencies = pyproject["tool"]["deepmd_build_backend"][ + "optional-dependencies" + ] + optional_dependencies["lmp"].extend(find_libpython_requires) + optional_dependencies["ipi"].extend(find_libpython_requires) return { - "test": [ - "dpdata>=0.1.9", - "ase", - "pytest", - "pytest-cov", - "pytest-sugar", - "dpgui", - ], - "docs": [ - "sphinx>=3.1.1", - "sphinx_rtd_theme>=1.0.0rc1", - "sphinx_markdown_tables", - "myst-nb>=1.0.0rc0", - "myst-parser>=0.19.2", - "breathe", - "exhale", - "numpydoc", - "ase", - "deepmodeling-sphinx>=0.1.0", - "dargs>=0.3.4", - "sphinx-argparse", - "pygments-lammps", - "sphinxcontrib-bibtex", - ], - "lmp": [ - "lammps~=2023.8.2.2.0", - *find_libpython_requires, - ], - "ipi": [ - "i-PI", - *find_libpython_requires, - ], - "gui": [ - "dpgui", - ], + **optional_dependencies, **get_tf_requirement(tf_version), - "cu11": [ - "nvidia-cuda-runtime-cu11", - "nvidia-cublas-cu11", - "nvidia-cufft-cu11", - "nvidia-curand-cu11", - "nvidia-cusolver-cu11", - "nvidia-cusparse-cu11", - "nvidia-cudnn-cu11", - "nvidia-cuda-nvcc-cu11", - ], - "cu12": [ - "nvidia-cuda-runtime-cu12", - "nvidia-cublas-cu12", - "nvidia-cufft-cu12", - "nvidia-curand-cu12", - "nvidia-cusolver-cu12", - "nvidia-cusparse-cu12", - "nvidia-cudnn-cu12", - "nvidia-cuda-nvcc-cu12", - ], + **get_pt_requirement(pt_version), } diff --git a/backend/find_pytorch.py b/backend/find_pytorch.py new file mode 100644 index 0000000000..125fd6a389 --- /dev/null +++ b/backend/find_pytorch.py @@ -0,0 +1,162 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import importlib +import os +import platform +import site +from functools import ( + lru_cache, +) +from importlib.machinery import ( + FileFinder, +) +from importlib.util import ( + find_spec, +) +from pathlib import ( + Path, +) +from sysconfig import ( + get_path, +) +from typing import ( + Optional, + Union, +) + +from packaging.specifiers import ( + SpecifierSet, +) +from packaging.version import ( + Version, +) + + +@lru_cache +def find_pytorch() -> tuple[Optional[str], list[str]]: + """Find PyTorch library. + + Tries to find PyTorch in the order of: + + 1. Environment variable `PYTORCH_ROOT` if set + 2. The current Python environment. + 3. user site packages directory if enabled + 4. system site packages directory (purelib) + + Considering the default PyTorch package still uses old CXX11 ABI, we + cannot install it automatically. + + Returns + ------- + str, optional + PyTorch library path if found. + list of str + TensorFlow requirement if not found. Empty if found. + """ + if os.environ.get("DP_ENABLE_PYTORCH", "0") == "0": + return None, [] + requires = [] + pt_spec = None + + if (pt_spec is None or not pt_spec) and os.environ.get("PYTORCH_ROOT") is not None: + site_packages = Path(os.environ.get("PYTORCH_ROOT")).parent.absolute() + pt_spec = FileFinder(str(site_packages)).find_spec("torch") + + # get pytorch spec + # note: isolated build will not work for backend + if pt_spec is None or not pt_spec: + pt_spec = find_spec("torch") + + if not pt_spec and site.ENABLE_USER_SITE: + # first search TF from user site-packages before global site-packages + site_packages = site.getusersitepackages() + if site_packages: + pt_spec = FileFinder(site_packages).find_spec("torch") + + if not pt_spec: + # purelib gets site-packages path + site_packages = get_path("purelib") + if site_packages: + pt_spec = FileFinder(site_packages).find_spec("torch") + + # get install dir from spec + try: + pt_install_dir = pt_spec.submodule_search_locations[0] # type: ignore + # AttributeError if ft_spec is None + # TypeError if submodule_search_locations are None + # IndexError if submodule_search_locations is an empty list + except (AttributeError, TypeError, IndexError): + pt_install_dir = None + requires.extend(get_pt_requirement()["torch"]) + return pt_install_dir, requires + + +@lru_cache +def get_pt_requirement(pt_version: str = "") -> dict: + """Get PyTorch requirement when PT is not installed. + + If pt_version is not given and the environment variable `PYTORCH_VERSION` is set, use it as the requirement. + + Parameters + ---------- + pt_version : str, optional + PT version + + Returns + ------- + dict + PyTorch requirement. + """ + if pt_version is None: + return {"torch": []} + if ( + os.environ.get("CIBUILDWHEEL", "0") == "1" + and platform.system() == "Linux" + and platform.machine() == "x86_64" + ): + cuda_version = os.environ.get("CUDA_VERSION", "12.2") + if cuda_version == "" or cuda_version in SpecifierSet(">=12,<13"): + # CUDA 12.2, cudnn 9 + pt_version = "2.5.0" + elif cuda_version in SpecifierSet(">=11,<12"): + # CUDA 11.8, cudnn 8 + pt_version = "2.3.1" + else: + raise RuntimeError("Unsupported CUDA version") from None + if pt_version == "": + pt_version = os.environ.get("PYTORCH_VERSION", "") + + return { + "torch": [ + # uv has different local version behaviors, i.e. `==2.3.1` cannot match `==2.3.1+cpu` + # https://github.com/astral-sh/uv/blob/main/PIP_COMPATIBILITY.md#local-version-identifiers + # luckily, .* (prefix matching) defined in PEP 440 can match any local version + # https://peps.python.org/pep-0440/#version-matching + f"torch=={Version(pt_version).base_version}.*" + if pt_version != "" + # https://github.com/pytorch/pytorch/commit/7e0c26d4d80d6602aed95cb680dfc09c9ce533bc + else "torch>=2.1.0" + ], + } + + +@lru_cache +def get_pt_version(pt_path: Optional[Union[str, Path]]) -> str: + """Get TF version from a TF Python library path. + + Parameters + ---------- + pt_path : str or Path + PT Python library path + + Returns + ------- + str + version + """ + if pt_path is None or pt_path == "": + return "" + version_file = Path(pt_path) / "version.py" + spec = importlib.util.spec_from_file_location("torch.version", version_file) + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module.__version__ diff --git a/backend/find_tensorflow.py b/backend/find_tensorflow.py index 08a73f7252..1fc3a8a6d9 100644 --- a/backend/find_tensorflow.py +++ b/backend/find_tensorflow.py @@ -17,9 +17,7 @@ get_path, ) from typing import ( - List, Optional, - Tuple, Union, ) @@ -28,8 +26,8 @@ ) -@lru_cache() -def find_tensorflow() -> Tuple[Optional[str], List[str]]: +@lru_cache +def find_tensorflow() -> tuple[Optional[str], list[str]]: """Find TensorFlow library. Tries to find TensorFlow in the order of: @@ -47,15 +45,11 @@ def find_tensorflow() -> Tuple[Optional[str], List[str]]: list of str TensorFlow requirement if not found. Empty if found. """ + if os.environ.get("DP_ENABLE_TENSORFLOW", "1") == "0": + return None, [] requires = [] tf_spec = None - if os.environ.get("CIBUILDWHEEL", "0") == "1" and os.environ.get( - "CIBW_BUILD", "" - ).endswith("macosx_arm64"): - # cibuildwheel cross build - site_packages = Path(os.environ.get("RUNNER_TEMP")) / "tensorflow" - tf_spec = FileFinder(str(site_packages)).find_spec("tensorflow") if (tf_spec is None or not tf_spec) and os.environ.get( "TENSORFLOW_ROOT" @@ -87,31 +81,33 @@ def find_tensorflow() -> Tuple[Optional[str], List[str]]: # TypeError if submodule_search_locations are None # IndexError if submodule_search_locations is an empty list except (AttributeError, TypeError, IndexError): + tf_version = "" if os.environ.get("CIBUILDWHEEL", "0") == "1": cuda_version = os.environ.get("CUDA_VERSION", "12.2") if cuda_version == "" or cuda_version in SpecifierSet(">=12,<13"): - # CUDA 12.2 + # CUDA 12.2, cudnn 9 requires.extend( [ - "tensorflow-cpu>=2.15.0rc0; platform_machine=='x86_64' and platform_system == 'Linux'", + "tensorflow-cpu>=2.18.0rc0; platform_machine=='x86_64' and platform_system == 'Linux'", ] ) elif cuda_version in SpecifierSet(">=11,<12"): - # CUDA 11.8 + # CUDA 11.8, cudnn 8 requires.extend( [ "tensorflow-cpu>=2.5.0rc0,<2.15; platform_machine=='x86_64' and platform_system == 'Linux'", ] ) + tf_version = "2.14.1" else: - raise RuntimeError("Unsupported CUDA version") - requires.extend(get_tf_requirement()["cpu"]) + raise RuntimeError("Unsupported CUDA version") from None + requires.extend(get_tf_requirement(tf_version)["cpu"]) # setuptools will re-find tensorflow after installing setup_requires tf_install_dir = None return tf_install_dir, requires -@lru_cache() +@lru_cache def get_tf_requirement(tf_version: str = "") -> dict: """Get TensorFlow requirement (CPU) when TF is not installed. @@ -127,6 +123,12 @@ def get_tf_requirement(tf_version: str = "") -> dict: dict TensorFlow requirement, including cpu and gpu. """ + if tf_version is None: + return { + "cpu": [], + "gpu": [], + "mpi": [], + } if tf_version == "": tf_version = os.environ.get("TENSORFLOW_VERSION", "") @@ -134,6 +136,9 @@ def get_tf_requirement(tf_version: str = "") -> dict: extra_select = {} if not (tf_version == "" or tf_version in SpecifierSet(">=2.12", prereleases=True)): extra_requires.append("protobuf<3.20") + # keras 3 is not compatible with tf.compat.v1 + # 2024/04/24: deepmd.tf doesn't import tf.keras any more + if tf_version == "" or tf_version in SpecifierSet(">=1.15", prereleases=True): extra_select["mpi"] = [ "horovod", @@ -148,12 +153,16 @@ def get_tf_requirement(tf_version: str = "") -> dict: "tensorflow-cpu; platform_machine!='aarch64' and (platform_machine!='arm64' or platform_system != 'Darwin')", "tensorflow; platform_machine=='aarch64' or (platform_machine=='arm64' and platform_system == 'Darwin')", # https://github.com/tensorflow/tensorflow/issues/61830 - "tensorflow-cpu<2.15; platform_system=='Windows'", + "tensorflow-cpu!=2.15.*; platform_system=='Windows'", + # https://github.com/h5py/h5py/issues/2408 + "h5py>=3.6.0,!=3.11.0; platform_system=='Linux' and platform_machine=='aarch64'", *extra_requires, ], "gpu": [ "tensorflow", "tensorflow-metal; platform_machine=='arm64' and platform_system == 'Darwin'", + # See above. + "h5py>=3.6.0,!=3.11.0; platform_system=='Linux' and platform_machine=='aarch64'", *extra_requires, ], **extra_select, @@ -189,8 +198,8 @@ def get_tf_requirement(tf_version: str = "") -> dict: } -@lru_cache() -def get_tf_version(tf_path: Union[str, Path]) -> str: +@lru_cache +def get_tf_version(tf_path: Optional[Union[str, Path]]) -> str: """Get TF version from a TF Python library path. Parameters diff --git a/backend/read_env.py b/backend/read_env.py index 079211d4d7..df358fc83a 100644 --- a/backend/read_env.py +++ b/backend/read_env.py @@ -5,22 +5,23 @@ from functools import ( lru_cache, ) -from typing import ( - Tuple, -) from packaging.version import ( Version, ) +from .find_pytorch import ( + find_pytorch, + get_pt_version, +) from .find_tensorflow import ( find_tensorflow, get_tf_version, ) -@lru_cache() -def get_argument_from_env() -> Tuple[str, list, list, dict, str]: +@lru_cache +def get_argument_from_env() -> tuple[str, list, list, dict, str, str]: """Get the arguments from environment variables. The environment variables are assumed to be not changed during the build. @@ -37,10 +38,12 @@ def get_argument_from_env() -> Tuple[str, list, list, dict, str]: The extra scripts to be installed. str The TensorFlow version. + str + The PyTorch version. """ cmake_args = [] extra_scripts = {} - # get variant option from the environment varibles, available: cpu, cuda, rocm + # get variant option from the environment variables, available: cpu, cuda, rocm dp_variant = os.environ.get("DP_VARIANT", "cpu").lower() if dp_variant == "cpu" or dp_variant == "": cmake_minimum_required_version = "3.16" @@ -54,13 +57,15 @@ def get_argument_from_env() -> Tuple[str, list, list, dict, str]: cmake_minimum_required_version = "3.21" cmake_args.append("-DUSE_ROCM_TOOLKIT:BOOL=TRUE") rocm_root = os.environ.get("ROCM_ROOT") + if not rocm_root: + rocm_root = os.environ.get("ROCM_PATH") if rocm_root: cmake_args.append(f"-DCMAKE_HIP_COMPILER_ROCM_ROOT:STRING={rocm_root}") hipcc_flags = os.environ.get("HIP_HIPCC_FLAGS") if hipcc_flags is not None: os.environ["HIPFLAGS"] = os.environ.get("HIPFLAGS", "") + " " + hipcc_flags else: - raise RuntimeError("Unsupported DP_VARIANT option: %s" % dp_variant) + raise RuntimeError(f"Unsupported DP_VARIANT option: {dp_variant}") if os.environ.get("DP_BUILD_TESTING", "0") == "1": cmake_args.append("-DBUILD_TESTING:BOOL=TRUE") @@ -78,18 +83,41 @@ def get_argument_from_env() -> Tuple[str, list, list, dict, str]: cmake_args.append(f"-DLAMMPS_VERSION={dp_lammps_version}") if dp_ipi == "1": cmake_args.append("-DENABLE_IPI:BOOL=TRUE") - extra_scripts["dp_ipi"] = "deepmd.entrypoints.ipi:dp_ipi" + extra_scripts["dp_ipi"] = "deepmd.tf.entrypoints.ipi:dp_ipi" - tf_install_dir, _ = find_tensorflow() - tf_version = get_tf_version(tf_install_dir) - if tf_version == "" or Version(tf_version) >= Version("2.12"): + if os.environ.get("DP_ENABLE_TENSORFLOW", "1") == "1": + tf_install_dir, _ = find_tensorflow() + tf_version = get_tf_version(tf_install_dir) + if tf_version == "" or Version(tf_version) >= Version("2.12"): + find_libpython_requires = [] + else: + find_libpython_requires = ["find_libpython"] + cmake_args.extend( + [ + "-DENABLE_TENSORFLOW=ON", + f"-DTENSORFLOW_VERSION={tf_version}", + f"-DTENSORFLOW_ROOT:PATH={tf_install_dir}", + ] + ) + else: find_libpython_requires = [] + cmake_args.append("-DENABLE_TENSORFLOW=OFF") + tf_version = None + + if os.environ.get("DP_ENABLE_PYTORCH", "0") == "1": + pt_install_dir, _ = find_pytorch() + pt_version = get_pt_version(pt_install_dir) + cmake_args.extend( + [ + "-DENABLE_PYTORCH=ON", + f"-DCMAKE_PREFIX_PATH={pt_install_dir}", + ] + ) else: - find_libpython_requires = ["find_libpython"] - cmake_args.append(f"-DTENSORFLOW_VERSION={tf_version}") + cmake_args.append("-DENABLE_PYTORCH=OFF") + pt_version = None cmake_args = [ - f"-DTENSORFLOW_ROOT:PATH={tf_install_dir}", "-DBUILD_PY_IF:BOOL=TRUE", *cmake_args, ] @@ -99,11 +127,12 @@ def get_argument_from_env() -> Tuple[str, list, list, dict, str]: find_libpython_requires, extra_scripts, tf_version, + pt_version, ) -def set_scikit_build_env(): +def set_scikit_build_env() -> None: """Set scikit-build environment variables before executing scikit-build.""" - cmake_minimum_required_version, cmake_args, _, _, _ = get_argument_from_env() + cmake_minimum_required_version, cmake_args, _, _, _, _ = get_argument_from_env() os.environ["SKBUILD_CMAKE_MINIMUM_VERSION"] = cmake_minimum_required_version os.environ["SKBUILD_CMAKE_ARGS"] = ";".join(cmake_args) diff --git a/codecov.yml b/codecov.yml index 3654859423..5b700bdddd 100644 --- a/codecov.yml +++ b/codecov.yml @@ -20,7 +20,6 @@ component_management: name: Python paths: - deepmd/** - - deepmd_utils/** - component_id: module_op name: OP paths: @@ -42,6 +41,10 @@ component_management: paths: - source/lmp/** - component_id: module_ipi - name: i-Pi + name: i-PI paths: - source/ipi/** +codecov: + notify: + # 12 Python + 2 C++ + after_n_builds: 14 diff --git a/data/json/json2yaml.py b/data/json/json2yaml.py index f4714908bb..8a07b4a6eb 100644 --- a/data/json/json2yaml.py +++ b/data/json/json2yaml.py @@ -13,7 +13,7 @@ import yaml -def _main(): +def _main() -> None: parser = argparse.ArgumentParser( description="convert json config file to yaml", formatter_class=argparse.ArgumentDefaultsHelpFormatter, diff --git a/data/raw/copy_raw.py b/data/raw/copy_raw.py index 642865db86..a0ea45277e 100755 --- a/data/raw/copy_raw.py +++ b/data/raw/copy_raw.py @@ -8,7 +8,7 @@ import numpy as np -def copy(in_dir, out_dir, ncopies=[1, 1, 1]): +def copy(in_dir, out_dir, ncopies=[1, 1, 1]) -> None: has_energy = os.path.isfile(in_dir + "/energy.raw") has_force = os.path.isfile(in_dir + "/force.raw") has_virial = os.path.isfile(in_dir + "/virial.raw") @@ -71,7 +71,7 @@ def copy(in_dir, out_dir, ncopies=[1, 1, 1]): np.savetxt(out_dir + "/ncopies.raw", ncopies, fmt="%d") -def _main(): +def _main() -> None: parser = argparse.ArgumentParser(description="parse copy raw args") parser.add_argument("INPUT", default=".", help="input dir of raw files") parser.add_argument("OUTPUT", default=".", help="output dir of copied raw files") @@ -85,7 +85,7 @@ def _main(): ) args = parser.parse_args() - print("# copy the system by %s copies" % args.ncopies) + print(f"# copy the system by {args.ncopies} copies") # noqa: T201 assert np.all( np.array(args.ncopies, dtype=int) >= np.array([1, 1, 1], dtype=int) ), "number of copies should be larger than or equal to 1" diff --git a/data/raw/shuffle_raw.py b/data/raw/shuffle_raw.py index 51bb7466c9..690307cc2c 100755 --- a/data/raw/shuffle_raw.py +++ b/data/raw/shuffle_raw.py @@ -30,14 +30,14 @@ def detect_raw(path): return raws -def _main(): +def _main() -> None: args = _parse_args() raws = args.raws inpath = args.INPUT outpath = args.OUTPUT if not os.path.isdir(inpath): - print("# no input dir " + inpath + ", exit") + print("# no input dir " + inpath + ", exit") # noqa: T201 return if not os.path.isdir(outpath): @@ -47,16 +47,16 @@ def _main(): raws = detect_raw(inpath) if len(raws) == 0: - print("# no file to shuffle, exit") + print("# no file to shuffle, exit") # noqa: T201 return assert "box.raw" in raws tmp = np.loadtxt(os.path.join(inpath, "box.raw")) tmp = np.reshape(tmp, [-1, 9]) nframe = tmp.shape[0] - print(nframe) + print(nframe) # noqa: T201 - print( + print( # noqa: T201 "# will shuffle raw files " + str(raws) + " in dir " diff --git a/deepmd/__init__.py b/deepmd/__init__.py index 0190bbc124..6f2b65ba63 100644 --- a/deepmd/__init__.py +++ b/deepmd/__init__.py @@ -1,61 +1,45 @@ # SPDX-License-Identifier: LGPL-3.0-or-later -"""Root of the deepmd package, exposes all public classes and submodules.""" +"""DeePMD-kit is a package written in Python/C++, designed to +minimize the effort required to build deep learning-based model +of interatomic potential energy and force field and to perform +molecular dynamics (MD). -try: - from importlib import ( - metadata, - ) -except ImportError: # for Python<3.8 - import importlib_metadata as metadata - -import deepmd.utils.network as network - -from . import ( - cluster, - descriptor, - fit, - loss, - nvnmd, - utils, -) -from .env import ( - set_mkl, -) -from .infer import ( - DeepEval, - DeepPotential, -) -from .infer.data_modifier import ( - DipoleChargeModifier, -) - -set_mkl() +The top module (deepmd.__init__) should not import any third-party +modules for performance. +""" try: - from deepmd_utils._version import version as __version__ + from deepmd._version import version as __version__ except ImportError: from .__about__ import ( __version__, ) -# load third-party plugins -try: - eps = metadata.entry_points(group="deepmd") -except TypeError: - eps = metadata.entry_points().get("deepmd", []) -for ep in eps: - ep.load() + +def DeepPotential(*args, **kwargs): + """Factory function that forwards to DeepEval (for compatibility + and performance). + + Parameters + ---------- + *args + positional arguments + **kwargs + keyword arguments + + Returns + ------- + DeepEval + potentials + """ + from deepmd.infer import ( + DeepPotential, + ) + + return DeepPotential(*args, **kwargs) + __all__ = [ "__version__", - "descriptor", - "fit", - "loss", - "utils", - "cluster", - "network", - "DeepEval", "DeepPotential", - "DipoleChargeModifier", - "nvnmd", ] diff --git a/deepmd/__main__.py b/deepmd/__main__.py index 6026b1c269..a31379b5e3 100644 --- a/deepmd/__main__.py +++ b/deepmd/__main__.py @@ -1,7 +1,7 @@ # SPDX-License-Identifier: LGPL-3.0-or-later """Package dp entry point.""" -from .entrypoints.main import ( +from deepmd.main import ( main, ) diff --git a/deepmd/backend/__init__.py b/deepmd/backend/__init__.py new file mode 100644 index 0000000000..fef9526294 --- /dev/null +++ b/deepmd/backend/__init__.py @@ -0,0 +1,27 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +"""Backends. + +Avoid directly importing third-party libraries in this module for performance. +""" + +# copy from dpdata +from importlib import ( + import_module, +) +from pathlib import ( + Path, +) + +from deepmd.utils.entry_point import ( + load_entry_point, +) + +PACKAGE_BASE = "deepmd.backend" +NOT_LOADABLE = ("__init__.py",) + +for module_file in Path(__file__).parent.glob("*.py"): + if module_file.name not in NOT_LOADABLE: + module_name = f".{module_file.stem}" + import_module(module_name, PACKAGE_BASE) + +load_entry_point("deepmd.backend") diff --git a/deepmd/backend/backend.py b/deepmd/backend/backend.py new file mode 100644 index 0000000000..3263169f6f --- /dev/null +++ b/deepmd/backend/backend.py @@ -0,0 +1,201 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from abc import ( + abstractmethod, +) +from enum import ( + Flag, + auto, +) +from typing import ( + TYPE_CHECKING, + Callable, + ClassVar, +) + +from deepmd.utils.plugin import ( + PluginVariant, + make_plugin_registry, +) + +if TYPE_CHECKING: + from argparse import ( + Namespace, + ) + + from deepmd.infer.deep_eval import ( + DeepEvalBackend, + ) + from deepmd.utils.neighbor_stat import ( + NeighborStat, + ) + + +class Backend(PluginVariant, make_plugin_registry("backend")): + r"""General backend class. + + Examples + -------- + >>> @Backend.register("tf") + >>> @Backend.register("tensorflow") + >>> class TensorFlowBackend(Backend): + ... pass + """ + + @staticmethod + def get_backend(key: str) -> type["Backend"]: + """Get the backend by key. + + Parameters + ---------- + key : str + the key of a backend + + Returns + ------- + Backend + the backend + """ + return Backend.get_class_by_type(key) + + @staticmethod + def get_backends() -> dict[str, type["Backend"]]: + """Get all the registered backend names. + + Returns + ------- + list + all the registered backends + """ + return Backend.get_plugins() + + @staticmethod + def get_backends_by_feature( + feature: "Backend.Feature", + ) -> dict[str, type["Backend"]]: + """Get all the registered backend names with a specific feature. + + Parameters + ---------- + feature : Backend.Feature + the feature flag + + Returns + ------- + list + all the registered backends with the feature + """ + return { + key: backend + for key, backend in Backend.get_backends().items() + if backend.features & feature + } + + @staticmethod + def detect_backend_by_model(filename: str) -> type["Backend"]: + """Detect the backend of the given model file. + + Parameters + ---------- + filename : str + The model file name + """ + filename = str(filename).lower() + for backend in Backend.get_backends().values(): + for suffix in backend.suffixes: + if filename.endswith(suffix): + return backend + raise ValueError(f"Cannot detect the backend of the model file {filename}.") + + class Feature(Flag): + """Feature flag to indicate whether the backend supports certain features.""" + + ENTRY_POINT = auto() + """Support entry point hook.""" + DEEP_EVAL = auto() + """Support Deep Eval backend.""" + NEIGHBOR_STAT = auto() + """Support neighbor statistics.""" + IO = auto() + """Support IO hook.""" + + name: ClassVar[str] = "Unknown" + """The formal name of the backend. + + To be consistent, this name should be also registered in the plugin system.""" + + features: ClassVar[Feature] = Feature(0) + """The features of the backend.""" + suffixes: ClassVar[list[str]] = [] + """The supported suffixes of the saved model. + + The first element is considered as the default suffix.""" + + @abstractmethod + def is_available(self) -> bool: + """Check if the backend is available. + + Returns + ------- + bool + Whether the backend is available. + """ + + @property + @abstractmethod + def entry_point_hook(self) -> Callable[["Namespace"], None]: + """The entry point hook of the backend. + + Returns + ------- + Callable[[Namespace], None] + The entry point hook of the backend. + """ + pass + + @property + @abstractmethod + def deep_eval(self) -> type["DeepEvalBackend"]: + """The Deep Eval backend of the backend. + + Returns + ------- + type[DeepEvalBackend] + The Deep Eval backend of the backend. + """ + pass + + @property + @abstractmethod + def neighbor_stat(self) -> type["NeighborStat"]: + """The neighbor statistics of the backend. + + Returns + ------- + type[NeighborStat] + The neighbor statistics of the backend. + """ + pass + + @property + @abstractmethod + def serialize_hook(self) -> Callable[[str], dict]: + """The serialize hook to convert the model file to a dictionary. + + Returns + ------- + Callable[[str], dict] + The serialize hook of the backend. + """ + pass + + @property + @abstractmethod + def deserialize_hook(self) -> Callable[[str, dict], None]: + """The deserialize hook to convert the dictionary to a model file. + + Returns + ------- + Callable[[str, dict], None] + The deserialize hook of the backend. + """ + pass diff --git a/deepmd/backend/dpmodel.py b/deepmd/backend/dpmodel.py new file mode 100644 index 0000000000..7c21b256ae --- /dev/null +++ b/deepmd/backend/dpmodel.py @@ -0,0 +1,120 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from typing import ( + TYPE_CHECKING, + Callable, + ClassVar, +) + +from deepmd.backend.backend import ( + Backend, +) + +if TYPE_CHECKING: + from argparse import ( + Namespace, + ) + + from deepmd.infer.deep_eval import ( + DeepEvalBackend, + ) + from deepmd.utils.neighbor_stat import ( + NeighborStat, + ) + + +@Backend.register("dp") +@Backend.register("dpmodel") +@Backend.register("np") +@Backend.register("numpy") +class DPModelBackend(Backend): + """DPModel backend that uses NumPy as the reference implementation.""" + + name = "DPModel" + """The formal name of the backend.""" + features: ClassVar[Backend.Feature] = ( + Backend.Feature.DEEP_EVAL | Backend.Feature.NEIGHBOR_STAT | Backend.Feature.IO + ) + """The features of the backend.""" + suffixes: ClassVar[list[str]] = [".dp", ".yaml", ".yml"] + """The suffixes of the backend.""" + + def is_available(self) -> bool: + """Check if the backend is available. + + Returns + ------- + bool + Whether the backend is available. + """ + return True + + @property + def entry_point_hook(self) -> Callable[["Namespace"], None]: + """The entry point hook of the backend. + + Returns + ------- + Callable[[Namespace], None] + The entry point hook of the backend. + """ + raise NotImplementedError(f"Unsupported backend: {self.name}") + + @property + def deep_eval(self) -> type["DeepEvalBackend"]: + """The Deep Eval backend of the backend. + + Returns + ------- + type[DeepEvalBackend] + The Deep Eval backend of the backend. + """ + from deepmd.dpmodel.infer.deep_eval import ( + DeepEval, + ) + + return DeepEval + + @property + def neighbor_stat(self) -> type["NeighborStat"]: + """The neighbor statistics of the backend. + + Returns + ------- + type[NeighborStat] + The neighbor statistics of the backend. + """ + from deepmd.dpmodel.utils.neighbor_stat import ( + NeighborStat, + ) + + return NeighborStat + + @property + def serialize_hook(self) -> Callable[[str], dict]: + """The serialize hook to convert the model file to a dictionary. + + Returns + ------- + Callable[[str], dict] + The serialize hook of the backend. + """ + from deepmd.dpmodel.utils.serialization import ( + load_dp_model, + ) + + return load_dp_model + + @property + def deserialize_hook(self) -> Callable[[str, dict], None]: + """The deserialize hook to convert the dictionary to a model file. + + Returns + ------- + Callable[[str, dict], None] + The deserialize hook of the backend. + """ + from deepmd.dpmodel.utils.serialization import ( + save_dp_model, + ) + + return save_dp_model diff --git a/deepmd/backend/jax.py b/deepmd/backend/jax.py new file mode 100644 index 0000000000..7a714c2090 --- /dev/null +++ b/deepmd/backend/jax.py @@ -0,0 +1,123 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from importlib.util import ( + find_spec, +) +from typing import ( + TYPE_CHECKING, + Callable, + ClassVar, +) + +from deepmd.backend.backend import ( + Backend, +) + +if TYPE_CHECKING: + from argparse import ( + Namespace, + ) + + from deepmd.infer.deep_eval import ( + DeepEvalBackend, + ) + from deepmd.utils.neighbor_stat import ( + NeighborStat, + ) + + +@Backend.register("jax") +class JAXBackend(Backend): + """JAX backend.""" + + name = "JAX" + """The formal name of the backend.""" + features: ClassVar[Backend.Feature] = ( + Backend.Feature.IO + | Backend.Feature.ENTRY_POINT + | Backend.Feature.DEEP_EVAL + | Backend.Feature.NEIGHBOR_STAT + ) + """The features of the backend.""" + suffixes: ClassVar[list[str]] = [".hlo", ".jax", ".savedmodel"] + """The suffixes of the backend.""" + + def is_available(self) -> bool: + """Check if the backend is available. + + Returns + ------- + bool + Whether the backend is available. + """ + return find_spec("jax") is not None + + @property + def entry_point_hook(self) -> Callable[["Namespace"], None]: + """The entry point hook of the backend. + + Returns + ------- + Callable[[Namespace], None] + The entry point hook of the backend. + """ + raise NotImplementedError + + @property + def deep_eval(self) -> type["DeepEvalBackend"]: + """The Deep Eval backend of the backend. + + Returns + ------- + type[DeepEvalBackend] + The Deep Eval backend of the backend. + """ + from deepmd.jax.infer.deep_eval import ( + DeepEval, + ) + + return DeepEval + + @property + def neighbor_stat(self) -> type["NeighborStat"]: + """The neighbor statistics of the backend. + + Returns + ------- + type[NeighborStat] + The neighbor statistics of the backend. + """ + from deepmd.jax.utils.neighbor_stat import ( + NeighborStat, + ) + + return NeighborStat + + @property + def serialize_hook(self) -> Callable[[str], dict]: + """The serialize hook to convert the model file to a dictionary. + + Returns + ------- + Callable[[str], dict] + The serialize hook of the backend. + """ + from deepmd.jax.utils.serialization import ( + serialize_from_file, + ) + + return serialize_from_file + + @property + def deserialize_hook(self) -> Callable[[str, dict], None]: + """The deserialize hook to convert the dictionary to a model file. + + Returns + ------- + Callable[[str, dict], None] + The deserialize hook of the backend. + """ + from deepmd.jax.utils.serialization import ( + deserialize_to_file, + ) + + return deserialize_to_file diff --git a/deepmd/backend/pytorch.py b/deepmd/backend/pytorch.py new file mode 100644 index 0000000000..f5b0dd92b2 --- /dev/null +++ b/deepmd/backend/pytorch.py @@ -0,0 +1,124 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from importlib.util import ( + find_spec, +) +from typing import ( + TYPE_CHECKING, + Callable, + ClassVar, +) + +from deepmd.backend.backend import ( + Backend, +) + +if TYPE_CHECKING: + from argparse import ( + Namespace, + ) + + from deepmd.infer.deep_eval import ( + DeepEvalBackend, + ) + from deepmd.utils.neighbor_stat import ( + NeighborStat, + ) + + +@Backend.register("pt") +@Backend.register("pytorch") +class PyTorchBackend(Backend): + """PyTorch backend.""" + + name = "PyTorch" + """The formal name of the backend.""" + features: ClassVar[Backend.Feature] = ( + Backend.Feature.ENTRY_POINT + | Backend.Feature.DEEP_EVAL + | Backend.Feature.NEIGHBOR_STAT + | Backend.Feature.IO + ) + """The features of the backend.""" + suffixes: ClassVar[list[str]] = [".pth", ".pt"] + """The suffixes of the backend.""" + + def is_available(self) -> bool: + """Check if the backend is available. + + Returns + ------- + bool + Whether the backend is available. + """ + return find_spec("torch") is not None + + @property + def entry_point_hook(self) -> Callable[["Namespace"], None]: + """The entry point hook of the backend. + + Returns + ------- + Callable[[Namespace], None] + The entry point hook of the backend. + """ + from deepmd.pt.entrypoints.main import main as deepmd_main + + return deepmd_main + + @property + def deep_eval(self) -> type["DeepEvalBackend"]: + """The Deep Eval backend of the backend. + + Returns + ------- + type[DeepEvalBackend] + The Deep Eval backend of the backend. + """ + from deepmd.pt.infer.deep_eval import DeepEval as DeepEvalPT + + return DeepEvalPT + + @property + def neighbor_stat(self) -> type["NeighborStat"]: + """The neighbor statistics of the backend. + + Returns + ------- + type[NeighborStat] + The neighbor statistics of the backend. + """ + from deepmd.pt.utils.neighbor_stat import ( + NeighborStat, + ) + + return NeighborStat + + @property + def serialize_hook(self) -> Callable[[str], dict]: + """The serialize hook to convert the model file to a dictionary. + + Returns + ------- + Callable[[str], dict] + The serialize hook of the backend. + """ + from deepmd.pt.utils.serialization import ( + serialize_from_file, + ) + + return serialize_from_file + + @property + def deserialize_hook(self) -> Callable[[str, dict], None]: + """The deserialize hook to convert the dictionary to a model file. + + Returns + ------- + Callable[[str, dict], None] + The deserialize hook of the backend. + """ + from deepmd.pt.utils.serialization import ( + deserialize_to_file, + ) + + return deserialize_to_file diff --git a/deepmd/backend/suffix.py b/deepmd/backend/suffix.py new file mode 100644 index 0000000000..e77aecb5d9 --- /dev/null +++ b/deepmd/backend/suffix.py @@ -0,0 +1,75 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import functools +import operator +from pathlib import ( + Path, +) +from typing import ( + Optional, + Union, +) + +from deepmd.backend.backend import ( + Backend, +) + + +def format_model_suffix( + filename: str, + feature: Optional[Backend.Feature] = None, + preferred_backend: Optional[Union[str, type["Backend"]]] = None, + strict_prefer: Optional[bool] = None, +) -> str: + """Check and format the suffixes of a filename. + + When preferred_backend is not given, this method checks the suffix of the filename + is within the suffixes of the any backends (with the given feature) and doesn't do formatting. + When preferred_backend is given, strict_prefer must be given. + If strict_prefer is True and the suffix is not within the suffixes of the preferred backend, + or strict_prefer is False and the suffix is not within the suffixes of the any backend with the given feature, + the filename will be formatted with the preferred suffix of the preferred backend. + + Parameters + ---------- + filename : str + The filename to be formatted. + feature : Backend.Feature, optional + The feature of the backend, by default None + preferred_backend : str or type of Backend, optional + The preferred backend, by default None + strict_prefer : bool, optional + Whether to strictly prefer the preferred backend, by default None + + Returns + ------- + str + The formatted filename with the correct suffix. + + Raises + ------ + ValueError + When preferred_backend is not given and the filename is not supported by any backend. + """ + if preferred_backend is not None and strict_prefer is None: + raise ValueError("strict_prefer must be given when preferred_backend is given.") + if isinstance(preferred_backend, str): + preferred_backend = Backend.get_backend(preferred_backend) + if preferred_backend is not None and strict_prefer: + all_backends = [preferred_backend] + elif feature is None: + all_backends = list(Backend.get_backends().values()) + else: + all_backends = list(Backend.get_backends_by_feature(feature).values()) + + all_suffixes = set( + functools.reduce( + operator.iconcat, [backend.suffixes for backend in all_backends], [] + ) + ) + pp = Path(filename) + current_suffix = pp.suffix + if current_suffix not in all_suffixes: + if preferred_backend is not None: + return str(pp) + preferred_backend.suffixes[0] + raise ValueError(f"Unsupported model file format: {filename}") + return filename diff --git a/deepmd/backend/tensorflow.py b/deepmd/backend/tensorflow.py new file mode 100644 index 0000000000..6b73d7c469 --- /dev/null +++ b/deepmd/backend/tensorflow.py @@ -0,0 +1,133 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from importlib.util import ( + find_spec, +) +from typing import ( + TYPE_CHECKING, + Callable, + ClassVar, +) + +from deepmd.backend.backend import ( + Backend, +) + +if TYPE_CHECKING: + from argparse import ( + Namespace, + ) + + from deepmd.infer.deep_eval import ( + DeepEvalBackend, + ) + from deepmd.utils.neighbor_stat import ( + NeighborStat, + ) + + +@Backend.register("tf") +@Backend.register("tensorflow") +class TensorFlowBackend(Backend): + """TensorFlow backend.""" + + name = "TensorFlow" + """The formal name of the backend.""" + features: ClassVar[Backend.Feature] = ( + Backend.Feature.ENTRY_POINT + | Backend.Feature.DEEP_EVAL + | Backend.Feature.NEIGHBOR_STAT + | Backend.Feature.IO + ) + """The features of the backend.""" + suffixes: ClassVar[list[str]] = [".pb"] + """The suffixes of the backend.""" + + def is_available(self) -> bool: + """Check if the backend is available. + + Returns + ------- + bool + Whether the backend is available. + """ + # deepmd.env imports expensive numpy + # avoid import outside the method + from deepmd.env import ( + GLOBAL_CONFIG, + ) + + return ( + find_spec("tensorflow") is not None + and GLOBAL_CONFIG["enable_tensorflow"] != "0" + ) + + @property + def entry_point_hook(self) -> Callable[["Namespace"], None]: + """The entry point hook of the backend. + + Returns + ------- + Callable[[Namespace], None] + The entry point hook of the backend. + """ + from deepmd.tf.entrypoints.main import main as deepmd_main + + return deepmd_main + + @property + def deep_eval(self) -> type["DeepEvalBackend"]: + """The Deep Eval backend of the backend. + + Returns + ------- + type[DeepEvalBackend] + The Deep Eval backend of the backend. + """ + from deepmd.tf.infer.deep_eval import DeepEval as DeepEvalTF + + return DeepEvalTF + + @property + def neighbor_stat(self) -> type["NeighborStat"]: + """The neighbor statistics of the backend. + + Returns + ------- + type[NeighborStat] + The neighbor statistics of the backend. + """ + from deepmd.tf.utils.neighbor_stat import ( + NeighborStat, + ) + + return NeighborStat + + @property + def serialize_hook(self) -> Callable[[str], dict]: + """The serialize hook to convert the model file to a dictionary. + + Returns + ------- + Callable[[str], dict] + The serialize hook of the backend. + """ + from deepmd.tf.utils.serialization import ( + serialize_from_file, + ) + + return serialize_from_file + + @property + def deserialize_hook(self) -> Callable[[str, dict], None]: + """The deserialize hook to convert the dictionary to a model file. + + Returns + ------- + Callable[[str, dict], None] + The deserialize hook of the backend. + """ + from deepmd.tf.utils.serialization import ( + deserialize_to_file, + ) + + return deserialize_to_file diff --git a/deepmd/calculator.py b/deepmd/calculator.py index b9c0a81006..c5f742bbec 100644 --- a/deepmd/calculator.py +++ b/deepmd/calculator.py @@ -7,8 +7,6 @@ from typing import ( TYPE_CHECKING, ClassVar, - Dict, - List, Optional, Union, ) @@ -19,8 +17,8 @@ all_changes, ) -from deepmd import ( - DeepPotential, +from deepmd.infer import ( + DeepPot, ) if TYPE_CHECKING: @@ -34,7 +32,7 @@ class DP(Calculator): """Implementation of ASE deepmd calculator. - Implemented propertie are `energy`, `forces` and `stress` + Implemented properties are `energy`, `forces` and `stress` Parameters ---------- @@ -42,18 +40,20 @@ class DP(Calculator): path to the model label : str, optional calculator label, by default "DP" - type_dict : Dict[str, int], optional + type_dict : dict[str, int], optional mapping of element types and their numbers, best left None and the calculator will infer this information from model, by default None neighbor_list : ase.neighborlist.NeighborList, optional The neighbor list object. If None, then build the native neighbor list. + head : Union[str, None], optional + a specific model branch choosing from pretrained model, by default None Examples -------- Compute potential energy >>> from ase import Atoms - >>> from deepmd.calculator import DP + >>> from deepmd.tf.calculator import DP >>> water = Atoms('H2O', >>> positions=[(0.7601, 1.9270, 1), >>> (1.9575, 1, 1), @@ -72,7 +72,7 @@ class DP(Calculator): """ name = "DP" - implemented_properties: ClassVar[List[str]] = [ + implemented_properties: ClassVar[list[str]] = [ "energy", "free_energy", "forces", @@ -84,12 +84,17 @@ def __init__( self, model: Union[str, "Path"], label: str = "DP", - type_dict: Optional[Dict[str, int]] = None, + type_dict: Optional[dict[str, int]] = None, neighbor_list=None, + head=None, **kwargs, ) -> None: Calculator.__init__(self, label=label, **kwargs) - self.dp = DeepPotential(str(Path(model).resolve()), neighbor_list=neighbor_list) + self.dp = DeepPot( + str(Path(model).resolve()), + neighbor_list=neighbor_list, + head=head, + ) if type_dict: self.type_dict = type_dict else: @@ -100,19 +105,19 @@ def __init__( def calculate( self, atoms: Optional["Atoms"] = None, - properties: List[str] = ["energy", "forces", "virial"], - system_changes: List[str] = all_changes, - ): + properties: list[str] = ["energy", "forces", "virial"], + system_changes: list[str] = all_changes, + ) -> None: """Run calculation with deepmd model. Parameters ---------- atoms : Optional[Atoms], optional atoms object to run the calculation on, by default None - properties : List[str], optional + properties : list[str], optional unused, only for function signature compatibility, by default ["energy", "forces", "stress"] - system_changes : List[str], optional + system_changes : list[str], optional unused, only for function signature compatibility, by default all_changes """ if atoms is not None: diff --git a/deepmd/cluster/slurm.py b/deepmd/cluster/slurm.py deleted file mode 100644 index 5264622232..0000000000 --- a/deepmd/cluster/slurm.py +++ /dev/null @@ -1,59 +0,0 @@ -# SPDX-License-Identifier: LGPL-3.0-or-later -"""MOdule to get resources on SLURM cluster. - -References ----------- -https://github.com/deepsense-ai/tensorflow_on_slurm #### -""" - -import os -from typing import ( - List, - Optional, - Tuple, -) - -import hostlist - -from deepmd.cluster import ( - local, -) - -__all__ = ["get_resource"] - - -def get_resource() -> Tuple[str, List[str], Optional[List[int]]]: - """Get SLURM resources: nodename, nodelist, and gpus. - - Returns - ------- - Tuple[str, List[str], Optional[List[int]]] - nodename, nodelist, and gpus - - Raises - ------ - RuntimeError - if number of nodes could not be retrieved - ValueError - list of nodes is not of the same length sa number of nodes - ValueError - if current nodename is not found in node list - """ - nodelist = hostlist.expand_hostlist(os.environ["SLURM_JOB_NODELIST"]) - nodename = os.environ["SLURMD_NODENAME"] - num_nodes_env = os.getenv("SLURM_JOB_NUM_NODES") - if num_nodes_env: - num_nodes = int(num_nodes_env) - else: - raise RuntimeError("Could not get SLURM number of nodes") - - if len(nodelist) != num_nodes: - raise ValueError( - f"Number of slurm nodes {len(nodelist)} not equal to {num_nodes}" - ) - if nodename not in nodelist: - raise ValueError( - f"Nodename({nodename}) not in nodelist({nodelist}). This should not happen!" - ) - gpus = local.get_gpus() - return nodename, nodelist, gpus diff --git a/deepmd/common.py b/deepmd/common.py index 54e3d0a6f8..1e66113306 100644 --- a/deepmd/common.py +++ b/deepmd/common.py @@ -1,286 +1,294 @@ # SPDX-License-Identifier: LGPL-3.0-or-later -"""Collection of functions and classes used throughout the whole package.""" - +import glob +import json +import os +import platform +import shutil import warnings -from functools import ( - wraps, +from hashlib import ( + sha1, +) +from pathlib import ( + Path, ) from typing import ( TYPE_CHECKING, Any, - Callable, + TypeVar, Union, + get_args, ) -import tensorflow -from tensorflow.python.framework import ( - tensor_util, -) +try: + from typing import Literal # python >=3.8 +except ImportError: + from typing_extensions import Literal # type: ignore + +import numpy as np +import yaml from deepmd.env import ( - GLOBAL_TF_FLOAT_PRECISION, - op_module, - tf, + GLOBAL_NP_FLOAT_PRECISION, ) -from deepmd_utils.common import ( - add_data_requirement, - data_requirement, - expand_sys_str, - get_np_precision, - j_loader, - j_must_have, - make_default_mesh, - select_idx_map, +from deepmd.utils.path import ( + DPPath, ) -if TYPE_CHECKING: - from deepmd_utils.common import ( - _ACTIVATION, - _PRECISION, - ) - __all__ = [ - # from deepmd_utils.common - "data_requirement", - "add_data_requirement", "select_idx_map", "make_default_mesh", - "j_must_have", "j_loader", "expand_sys_str", "get_np_precision", - # from self - "PRECISION_DICT", + "VALID_PRECISION", + "VALID_ACTIVATION", +] + +_PRECISION = Literal["default", "float16", "float32", "float64"] +_ACTIVATION = Literal[ + "relu", + "relu6", + "softplus", + "sigmoid", + "tanh", "gelu", "gelu_tf", - "ACTIVATION_FN_DICT", - "get_activation_func", - "get_precision", - "safe_cast_tensor", - "cast_precision", - "clear_session", + "none", + "linear", ] +# get_args is new in py38 +VALID_PRECISION: set[_PRECISION] = set(get_args(_PRECISION)) +VALID_ACTIVATION: set[_ACTIVATION] = set(get_args(_ACTIVATION)) -# define constants -PRECISION_DICT = { - "default": GLOBAL_TF_FLOAT_PRECISION, - "float16": tf.float16, - "float32": tf.float32, - "float64": tf.float64, - "bfloat16": tf.bfloat16, -} - +if TYPE_CHECKING: + _DICT_VAL = TypeVar("_DICT_VAL") + __all__ += [ + "_DICT_VAL", + "_PRECISION", + "_ACTIVATION", + ] -def gelu(x: tf.Tensor) -> tf.Tensor: - """Gaussian Error Linear Unit. - This is a smoother version of the RELU, implemented by custom operator. +def select_idx_map(atom_types: np.ndarray, select_types: np.ndarray) -> np.ndarray: + """Build map of indices for element supplied element types from all atoms list. Parameters ---------- - x : tf.Tensor - float Tensor to perform activation + atom_types : np.ndarray + array specifying type for each atoms as integer + select_types : np.ndarray + types of atoms you want to find indices for Returns ------- - tf.Tensor - `x` with the GELU activation applied + np.ndarray + indices of types of atoms defined by `select_types` in `atom_types` array - References - ---------- - Original paper - https://arxiv.org/abs/1606.08415 + Warnings + -------- + `select_types` array will be sorted before finding indices in `atom_types` """ - return op_module.gelu_custom(x) + sort_select_types = np.sort(select_types) + idx_map = [] + for ii in sort_select_types: + idx_map.append(np.where(atom_types == ii)[0]) + return np.concatenate(idx_map) -def gelu_tf(x: tf.Tensor) -> tf.Tensor: - """Gaussian Error Linear Unit. +def make_default_mesh(pbc: bool, mixed_type: bool) -> np.ndarray: + """Make mesh. - This is a smoother version of the RELU, implemented by TF. + Only the size of mesh matters, not the values: + * 6 for PBC, no mixed types + * 0 for no PBC, no mixed types + * 7 for PBC, mixed types + * 1 for no PBC, mixed types Parameters ---------- - x : tf.Tensor - float Tensor to perform activation + pbc : bool + if True, the mesh will be made for periodic boundary conditions + mixed_type : bool + if True, the mesh will be made for mixed types Returns ------- - tf.Tensor - `x` with the GELU activation applied - - References - ---------- - Original paper - https://arxiv.org/abs/1606.08415 + np.ndarray + mesh """ + mesh_size = int(pbc) * 6 + int(mixed_type) + default_mesh = np.zeros(mesh_size, dtype=np.int32) + return default_mesh - def gelu_wrapper(x): - try: - return tensorflow.nn.gelu(x, approximate=True) - except AttributeError: - warnings.warn( - "TensorFlow does not provide an implementation of gelu, please upgrade your TensorFlow version. Fallback to the custom gelu operator." - ) - return op_module.gelu_custom(x) - - return (lambda x: gelu_wrapper(x))(x) - - -ACTIVATION_FN_DICT = { - "relu": tf.nn.relu, - "relu6": tf.nn.relu6, - "softplus": tf.nn.softplus, - "sigmoid": tf.sigmoid, - "tanh": tf.nn.tanh, - "gelu": gelu, - "gelu_tf": gelu_tf, - "None": None, - "none": None, -} - - -def get_activation_func( - activation_fn: Union["_ACTIVATION", None], -) -> Union[Callable[[tf.Tensor], tf.Tensor], None]: - """Get activation function callable based on string name. + +def j_deprecated( + jdata: dict[str, "_DICT_VAL"], key: str, deprecated_key: list[str] = [] +) -> "_DICT_VAL": + """Assert that supplied dictionary contains specified key. Parameters ---------- - activation_fn : _ACTIVATION - one of the defined activation functions + jdata : dict[str, _DICT_VAL] + dictionary to check + key : str + key to check + deprecated_key : list[str], optional + list of deprecated keys, by default [] Returns ------- - Callable[[tf.Tensor], tf.Tensor] - correspondingg TF callable + _DICT_VAL + value that was store unde supplied key Raises ------ RuntimeError - if unknown activation function is specified + if the key is not present """ - if activation_fn is None: - return None - if activation_fn not in ACTIVATION_FN_DICT: - raise RuntimeError(f"{activation_fn} is not a valid activation function") - return ACTIVATION_FN_DICT[activation_fn] + if key not in jdata.keys(): + for ii in deprecated_key: + if ii in jdata.keys(): + warnings.warn(f"the key {ii} is deprecated, please use {key} instead") + return jdata[ii] + else: + raise RuntimeError(f"json database must provide key {key}") + else: + return jdata[key] -def get_precision(precision: "_PRECISION") -> Any: - """Convert str to TF DType constant. +def j_loader(filename: Union[str, Path]) -> dict[str, Any]: + """Load yaml or json settings file. Parameters ---------- - precision : _PRECISION - one of the allowed precisions + filename : Union[str, Path] + path to file Returns ------- - tf.python.framework.dtypes.DType - appropriate TF constant + dict[str, Any] + loaded dictionary Raises ------ - RuntimeError - if supplied precision string does not have acorresponding TF constant + TypeError + if the supplied file is of unsupported type """ - if precision not in PRECISION_DICT: - raise RuntimeError(f"{precision} is not a valid precision") - return PRECISION_DICT[precision] - + filepath = Path(filename) + if filepath.suffix.endswith("json"): + with filepath.open() as fp: + return json.load(fp) + elif filepath.suffix.endswith(("yml", "yaml")): + with filepath.open() as fp: + return yaml.safe_load(fp) + else: + raise TypeError("config file must be json, or yaml/yml") -def safe_cast_tensor( - input: tf.Tensor, from_precision: tf.DType, to_precision: tf.DType -) -> tf.Tensor: - """Convert a Tensor from a precision to another precision. - If input is not a Tensor or without the specific precision, the method will not - cast it. +def expand_sys_str(root_dir: Union[str, Path]) -> list[str]: + """Recursively iterate over directories taking those that contain `type.raw` file. Parameters ---------- - input : tf.Tensor - input tensor - from_precision : tf.DType - Tensor data type that is casted from - to_precision : tf.DType - Tensor data type that casts to + root_dir : Union[str, Path] + starting directory Returns ------- - tf.Tensor - casted Tensor + list[str] + list of string pointing to system directories """ - if tensor_util.is_tensor(input) and input.dtype == from_precision: - return tf.cast(input, to_precision) - return input - + root_dir = DPPath(root_dir) + matches = [str(d) for d in root_dir.rglob("*") if (d / "type.raw").is_file()] + if (root_dir / "type.raw").is_file(): + matches.append(str(root_dir)) + return matches -def cast_precision(func: Callable) -> Callable: - """A decorator that casts and casts back the input - and output tensor of a method. - The decorator should be used in a classmethod. +def get_np_precision(precision: "_PRECISION") -> np.dtype: + """Get numpy precision constant from string. - The decorator will do the following thing: - (1) It casts input Tensors from `GLOBAL_TF_FLOAT_PRECISION` - to precision defined by property `precision`. - (2) It casts output Tensors from `precision` to - `GLOBAL_TF_FLOAT_PRECISION`. - (3) It checks inputs and outputs and only casts when - input or output is a Tensor and its dtype matches - `GLOBAL_TF_FLOAT_PRECISION` and `precision`, respectively. - If it does not match (e.g. it is an integer), the decorator - will do nothing on it. + Parameters + ---------- + precision : _PRECISION + string name of numpy constant or default Returns ------- - Callable - a decorator that casts and casts back the input and - output tensor of a method + np.dtype + numpy precision constant - Examples - -------- - >>> class A: - ... @property - ... def precision(self): - ... return tf.float32 - ... - ... @cast_precision - ... def f(x: tf.Tensor, y: tf.Tensor) -> tf.Tensor: - ... return x ** 2 + y + Raises + ------ + RuntimeError + if string is invalid """ + if precision == "default": + return GLOBAL_NP_FLOAT_PRECISION + elif precision == "float16": + return np.float16 + elif precision == "float32": + return np.float32 + elif precision == "float64": + return np.float64 + else: + raise RuntimeError(f"{precision} is not a valid precision") + - @wraps(func) - def wrapper(self, *args, **kwargs): - # only convert tensors - returned_tensor = func( - self, - *[ - safe_cast_tensor(vv, GLOBAL_TF_FLOAT_PRECISION, self.precision) - for vv in args - ], - **{ - kk: safe_cast_tensor(vv, GLOBAL_TF_FLOAT_PRECISION, self.precision) - for kk, vv in kwargs.items() - }, - ) - if isinstance(returned_tensor, tuple): - return tuple( - safe_cast_tensor(vv, self.precision, GLOBAL_TF_FLOAT_PRECISION) - for vv in returned_tensor - ) +def symlink_prefix_files(old_prefix: str, new_prefix: str) -> None: + """Create symlinks from old checkpoint prefix to new one. + + On Windows this function will copy files instead of creating symlinks. + + Parameters + ---------- + old_prefix : str + old checkpoint prefix, all files with this prefix will be symlinked + new_prefix : str + new checkpoint prefix + """ + original_files = glob.glob(old_prefix + ".*") + for ori_ff in original_files: + new_ff = new_prefix + ori_ff[len(old_prefix) :] + try: + # remove old one + os.remove(new_ff) + except OSError: + pass + if platform.system() != "Windows": + # by default one does not have access to create symlink on Windows + os.symlink(os.path.relpath(ori_ff, os.path.dirname(new_ff)), new_ff) else: - return safe_cast_tensor( - returned_tensor, self.precision, GLOBAL_TF_FLOAT_PRECISION - ) + shutil.copyfile(ori_ff, new_ff) - return wrapper +def get_hash(obj) -> str: + """Get hash of object. -def clear_session(): - """Reset all state generated by DeePMD-kit.""" - tf.reset_default_graph() - # TODO: remove this line when data_requirement is not a global variable - data_requirement.clear() + Parameters + ---------- + obj + object to hash + """ + return sha1(json.dumps(obj).encode("utf-8")).hexdigest() + + +def j_get_type(data: dict, class_name: str = "object") -> str: + """Get the type from the data. + + Parameters + ---------- + data : dict + the data + class_name : str, optional + the name of the class for error message, by default "object" + + Returns + ------- + str + the type + """ + try: + return data["type"] + except KeyError as e: + raise KeyError(f"the type of the {class_name} should be set by `type`") from e diff --git a/deepmd/descriptor/se.py b/deepmd/descriptor/se.py deleted file mode 100644 index 598f6f9ff8..0000000000 --- a/deepmd/descriptor/se.py +++ /dev/null @@ -1,162 +0,0 @@ -# SPDX-License-Identifier: LGPL-3.0-or-later -from typing import ( - Tuple, -) - -from deepmd.env import ( - tf, -) -from deepmd.utils.graph import ( - get_embedding_net_variables_from_graph_def, - get_tensor_by_name_from_graph, -) - -from .descriptor import ( - Descriptor, -) - - -class DescrptSe(Descriptor): - """A base class for smooth version of descriptors. - - Notes - ----- - All of these descriptors have an environmental matrix and an - embedding network (:meth:`deepmd.utils.network.embedding_net`), so - they can share some similiar methods without defining them twice. - - Attributes - ---------- - embedding_net_variables : dict - initial embedding network variables - descrpt_reshape : tf.Tensor - the reshaped descriptor - descrpt_deriv : tf.Tensor - the descriptor derivative - rij : tf.Tensor - distances between two atoms - nlist : tf.Tensor - the neighbor list - - """ - - def _identity_tensors(self, suffix: str = "") -> None: - """Identify tensors which are expected to be stored and restored. - - Notes - ----- - These tensors will be indentitied: - self.descrpt_reshape : o_rmat - self.descrpt_deriv : o_rmat_deriv - self.rij : o_rij - self.nlist : o_nlist - Thus, this method should be called during building the descriptor and - after these tensors are initialized. - - Parameters - ---------- - suffix : str - The suffix of the scope - """ - self.descrpt_reshape = tf.identity(self.descrpt_reshape, name="o_rmat" + suffix) - self.descrpt_deriv = tf.identity( - self.descrpt_deriv, name="o_rmat_deriv" + suffix - ) - self.rij = tf.identity(self.rij, name="o_rij" + suffix) - self.nlist = tf.identity(self.nlist, name="o_nlist" + suffix) - - def get_tensor_names(self, suffix: str = "") -> Tuple[str]: - """Get names of tensors. - - Parameters - ---------- - suffix : str - The suffix of the scope - - Returns - ------- - Tuple[str] - Names of tensors - """ - return ( - f"o_rmat{suffix}:0", - f"o_rmat_deriv{suffix}:0", - f"o_rij{suffix}:0", - f"o_nlist{suffix}:0", - ) - - def pass_tensors_from_frz_model( - self, - descrpt_reshape: tf.Tensor, - descrpt_deriv: tf.Tensor, - rij: tf.Tensor, - nlist: tf.Tensor, - ): - """Pass the descrpt_reshape tensor as well as descrpt_deriv tensor from the frz graph_def. - - Parameters - ---------- - descrpt_reshape - The passed descrpt_reshape tensor - descrpt_deriv - The passed descrpt_deriv tensor - rij - The passed rij tensor - nlist - The passed nlist tensor - """ - self.rij = rij - self.nlist = nlist - self.descrpt_deriv = descrpt_deriv - self.descrpt_reshape = descrpt_reshape - - def init_variables( - self, - graph: tf.Graph, - graph_def: tf.GraphDef, - suffix: str = "", - ) -> None: - """Init the embedding net variables with the given dict. - - Parameters - ---------- - graph : tf.Graph - The input frozen model graph - graph_def : tf.GraphDef - The input frozen model graph_def - suffix : str, optional - The suffix of the scope - """ - self.embedding_net_variables = get_embedding_net_variables_from_graph_def( - graph_def, suffix=suffix - ) - self.davg = get_tensor_by_name_from_graph( - graph, "descrpt_attr%s/t_avg" % suffix - ) - self.dstd = get_tensor_by_name_from_graph( - graph, "descrpt_attr%s/t_std" % suffix - ) - - @property - def precision(self) -> tf.DType: - """Precision of filter network.""" - return self.filter_precision - - @classmethod - def update_sel(cls, global_jdata: dict, local_jdata: dict): - """Update the selection and perform neighbor statistics. - - Parameters - ---------- - global_jdata : dict - The global data, containing the training section - local_jdata : dict - The local data refer to the current class - """ - from deepmd.entrypoints.train import ( - update_one_sel, - ) - - # default behavior is to update sel which is a list - local_jdata_cpy = local_jdata.copy() - return update_one_sel(global_jdata, local_jdata_cpy, False) diff --git a/deepmd/descriptor/se_atten.py b/deepmd/descriptor/se_atten.py deleted file mode 100644 index 1ceda23065..0000000000 --- a/deepmd/descriptor/se_atten.py +++ /dev/null @@ -1,1432 +0,0 @@ -# SPDX-License-Identifier: LGPL-3.0-or-later -import logging -import warnings -from typing import ( - List, - Optional, - Tuple, -) - -import numpy as np -from packaging.version import ( - Version, -) - -from deepmd.common import ( - cast_precision, - get_np_precision, -) -from deepmd.env import ( - GLOBAL_NP_FLOAT_PRECISION, - GLOBAL_TF_FLOAT_PRECISION, - TF_VERSION, - default_tf_session_config, - op_module, - tf, -) -from deepmd.nvnmd.descriptor.se_atten import ( - build_davg_dstd, - build_op_descriptor, - check_switch_range, - descrpt2r4, - filter_GR2D, - filter_lower_R42GR, -) -from deepmd.nvnmd.utils.config import ( - nvnmd_cfg, -) -from deepmd.utils.compress import ( - get_extra_side_embedding_net_variable, - get_two_side_type_embedding, - make_data, -) -from deepmd.utils.graph import ( - get_attention_layer_variables_from_graph_def, - get_extra_embedding_net_suffix, - get_extra_embedding_net_variables_from_graph_def, - get_pattern_nodes_from_graph_def, - get_tensor_by_name_from_graph, -) -from deepmd.utils.network import ( - embedding_net, - one_layer, -) -from deepmd.utils.sess import ( - run_sess, -) -from deepmd.utils.tabulate import ( - DPTabulate, -) - -from .descriptor import ( - Descriptor, -) -from .se_a import ( - DescrptSeA, -) - -log = logging.getLogger(__name__) - - -@Descriptor.register("se_atten") -class DescrptSeAtten(DescrptSeA): - r"""Smooth version descriptor with attention. - - Parameters - ---------- - rcut - The cut-off radius :math:`r_c` - rcut_smth - From where the environment matrix should be smoothed :math:`r_s` - sel : list[str] - sel[i] specifies the maxmum number of type i atoms in the cut-off radius - neuron : list[int] - Number of neurons in each hidden layers of the embedding net :math:`\mathcal{N}` - axis_neuron - Number of the axis neuron :math:`M_2` (number of columns of the sub-matrix of the embedding matrix) - resnet_dt - Time-step `dt` in the resnet construction: - y = x + dt * \phi (Wx + b) - trainable - If the weights of embedding net are trainable. - seed - Random seed for initializing the network parameters. - type_one_side - Try to build N_types embedding nets. Otherwise, building N_types^2 embedding nets - exclude_types : List[List[int]] - The excluded pairs of types which have no interaction with each other. - For example, `[[0, 1]]` means no interaction between type 0 and type 1. - set_davg_zero - Set the shift of embedding net input to zero. - activation_function - The activation function in the embedding net. Supported options are |ACTIVATION_FN| - precision - The precision of the embedding net parameters. Supported options are |PRECISION| - uniform_seed - Only for the purpose of backward compatibility, retrieves the old behavior of using the random seed - attn - The length of hidden vector during scale-dot attention computation. - attn_layer - The number of layers in attention mechanism. - attn_dotr - Whether to dot the relative coordinates on the attention weights as a gated scheme. - attn_mask - Whether to mask the diagonal in the attention weights. - multi_task - If the model has multi fitting nets to train. - stripped_type_embedding - Whether to strip the type embedding into a separated embedding network. - Default value will be True in `se_atten_v2` descriptor. - smooth_type_embdding - When using stripped type embedding, whether to dot smooth factor on the network output of type embedding - to keep the network smooth, instead of setting `set_davg_zero` to be True. - Default value will be True in `se_atten_v2` descriptor. - - Raises - ------ - ValueError - if ntypes is 0. - """ - - def __init__( - self, - rcut: float, - rcut_smth: float, - sel: int, - ntypes: int, - neuron: List[int] = [24, 48, 96], - axis_neuron: int = 8, - resnet_dt: bool = False, - trainable: bool = True, - seed: Optional[int] = None, - type_one_side: bool = True, - set_davg_zero: bool = True, - exclude_types: List[List[int]] = [], - activation_function: str = "tanh", - precision: str = "default", - uniform_seed: bool = False, - attn: int = 128, - attn_layer: int = 2, - attn_dotr: bool = True, - attn_mask: bool = False, - multi_task: bool = False, - stripped_type_embedding: bool = False, - smooth_type_embdding: bool = False, - **kwargs, - ) -> None: - if not set_davg_zero and not (stripped_type_embedding and smooth_type_embdding): - warnings.warn( - "Set 'set_davg_zero' False in descriptor 'se_atten' " - "may cause unexpected incontinuity during model inference!" - ) - DescrptSeA.__init__( - self, - rcut, - rcut_smth, - [sel], - neuron=neuron, - axis_neuron=axis_neuron, - resnet_dt=resnet_dt, - trainable=trainable, - seed=seed, - type_one_side=type_one_side, - exclude_types=exclude_types, - set_davg_zero=set_davg_zero, - activation_function=activation_function, - precision=precision, - uniform_seed=uniform_seed, - multi_task=multi_task, - ) - """ - Constructor - """ - if not (nvnmd_cfg.enable and (nvnmd_cfg.version == 1)): - assert Version(TF_VERSION) > Version( - "2" - ), "se_atten only support tensorflow version 2.0 or higher." - if ntypes == 0: - raise ValueError("`model/type_map` is not set or empty!") - self.stripped_type_embedding = stripped_type_embedding - self.smooth = smooth_type_embdding - self.ntypes = ntypes - self.att_n = attn - self.attn_layer = attn_layer - self.attn_mask = attn_mask - self.attn_dotr = attn_dotr - self.filter_np_precision = get_np_precision(precision) - self.two_side_embeeding_net_variables = None - self.layer_size = len(neuron) - - # descrpt config - self.sel_all_a = [sel] - self.sel_all_r = [0] - avg_zero = np.zeros([self.ntypes, self.ndescrpt]).astype( - GLOBAL_NP_FLOAT_PRECISION - ) - std_ones = np.ones([self.ntypes, self.ndescrpt]).astype( - GLOBAL_NP_FLOAT_PRECISION - ) - self.beta = np.zeros([self.attn_layer, self.filter_neuron[-1]]).astype( - GLOBAL_NP_FLOAT_PRECISION - ) - self.gamma = np.ones([self.attn_layer, self.filter_neuron[-1]]).astype( - GLOBAL_NP_FLOAT_PRECISION - ) - self.attention_layer_variables = None - sub_graph = tf.Graph() - with sub_graph.as_default(): - name_pfx = "d_sea_" - for ii in ["coord", "box"]: - self.place_holders[ii] = tf.placeholder( - GLOBAL_NP_FLOAT_PRECISION, [None, None], name=name_pfx + "t_" + ii - ) - self.place_holders["type"] = tf.placeholder( - tf.int32, [None, None], name=name_pfx + "t_type" - ) - self.place_holders["natoms_vec"] = tf.placeholder( - tf.int32, [self.ntypes + 2], name=name_pfx + "t_natoms" - ) - self.place_holders["default_mesh"] = tf.placeholder( - tf.int32, [None], name=name_pfx + "t_mesh" - ) - ( - self.stat_descrpt, - self.descrpt_deriv_t, - self.rij_t, - self.nlist_t, - self.nei_type_vec_t, - self.nmask_t, - ) = op_module.prod_env_mat_a_mix( - self.place_holders["coord"], - self.place_holders["type"], - self.place_holders["natoms_vec"], - self.place_holders["box"], - self.place_holders["default_mesh"], - tf.constant(avg_zero), - tf.constant(std_ones), - rcut_a=self.rcut_a, - rcut_r=self.rcut_r, - rcut_r_smth=self.rcut_r_smth, - sel_a=self.sel_all_a, - sel_r=self.sel_all_r, - ) - self.sub_sess = tf.Session(graph=sub_graph, config=default_tf_session_config) - - def compute_input_stats( - self, - data_coord: list, - data_box: list, - data_atype: list, - natoms_vec: list, - mesh: list, - input_dict: dict, - mixed_type: bool = False, - real_natoms_vec: Optional[list] = None, - **kwargs, - ) -> None: - """Compute the statisitcs (avg and std) of the training data. The input will be normalized by the statistics. - - Parameters - ---------- - data_coord - The coordinates. Can be generated by deepmd.model.make_stat_input - data_box - The box. Can be generated by deepmd.model.make_stat_input - data_atype - The atom types. Can be generated by deepmd.model.make_stat_input - natoms_vec - The vector for the number of atoms of the system and different types of atoms. - If mixed_type is True, this para is blank. See real_natoms_vec. - mesh - The mesh for neighbor searching. Can be generated by deepmd.model.make_stat_input - input_dict - Dictionary for additional input - mixed_type - Whether to perform the mixed_type mode. - If True, the input data has the mixed_type format (see doc/model/train_se_atten.md), - in which frames in a system may have different natoms_vec(s), with the same nloc. - real_natoms_vec - If mixed_type is True, it takes in the real natoms_vec for each frame. - **kwargs - Additional keyword arguments. - """ - if True: - sumr = [] - suma = [] - sumn = [] - sumr2 = [] - suma2 = [] - if mixed_type: - sys_num = 0 - for cc, bb, tt, nn, mm, r_n in zip( - data_coord, data_box, data_atype, natoms_vec, mesh, real_natoms_vec - ): - sysr, sysr2, sysa, sysa2, sysn = self._compute_dstats_sys_smth( - cc, bb, tt, nn, mm, mixed_type, r_n - ) - sys_num += 1 - sumr.append(sysr) - suma.append(sysa) - sumn.append(sysn) - sumr2.append(sysr2) - suma2.append(sysa2) - else: - for cc, bb, tt, nn, mm in zip( - data_coord, data_box, data_atype, natoms_vec, mesh - ): - sysr, sysr2, sysa, sysa2, sysn = self._compute_dstats_sys_smth( - cc, bb, tt, nn, mm - ) - sumr.append(sysr) - suma.append(sysa) - sumn.append(sysn) - sumr2.append(sysr2) - suma2.append(sysa2) - if not self.multi_task: - stat_dict = { - "sumr": sumr, - "suma": suma, - "sumn": sumn, - "sumr2": sumr2, - "suma2": suma2, - } - self.merge_input_stats(stat_dict) - else: - self.stat_dict["sumr"] += sumr - self.stat_dict["suma"] += suma - self.stat_dict["sumn"] += sumn - self.stat_dict["sumr2"] += sumr2 - self.stat_dict["suma2"] += suma2 - - def enable_compression( - self, - min_nbor_dist: float, - graph: tf.Graph, - graph_def: tf.GraphDef, - table_extrapolate: float = 5, - table_stride_1: float = 0.01, - table_stride_2: float = 0.1, - check_frequency: int = -1, - suffix: str = "", - ) -> None: - """Reveive the statisitcs (distance, max_nbor_size and env_mat_range) of the training data. - - Parameters - ---------- - min_nbor_dist - The nearest distance between atoms - graph : tf.Graph - The graph of the model - graph_def : tf.GraphDef - The graph_def of the model - table_extrapolate - The scale of model extrapolation - table_stride_1 - The uniform stride of the first table - table_stride_2 - The uniform stride of the second table - check_frequency - The overflow check frequency - suffix : str, optional - The suffix of the scope - """ - # do some checks before the mocel compression process - assert ( - not self.filter_resnet_dt - ), "Model compression error: descriptor resnet_dt must be false!" - for tt in self.exclude_types: - if (tt[0] not in range(self.ntypes)) or (tt[1] not in range(self.ntypes)): - raise RuntimeError( - "exclude types" - + str(tt) - + " must within the number of atomic types " - + str(self.ntypes) - + "!" - ) - if self.ntypes * self.ntypes - len(self.exclude_types) == 0: - raise RuntimeError( - "empty embedding-net are not supported in model compression!" - ) - - if self.attn_layer != 0: - raise RuntimeError("can not compress model when attention layer is not 0.") - - ret = get_pattern_nodes_from_graph_def( - graph_def, - f"filter_type_all{suffix}/.+{get_extra_embedding_net_suffix(type_one_side=False)}", - ) - if len(ret) == 0: - raise RuntimeError( - f"can not find variables of embedding net `*{get_extra_embedding_net_suffix(type_one_side=False)}` from graph_def, maybe it is not a compressible model." - ) - - self.compress = True - self.table = DPTabulate( - self, - self.filter_neuron, - graph, - graph_def, - True, - self.exclude_types, - self.compress_activation_fn, - suffix=suffix, - ) - self.table_config = [ - table_extrapolate, - table_stride_1, - table_stride_2, - check_frequency, - ] - self.lower, self.upper = self.table.build( - min_nbor_dist, table_extrapolate, table_stride_1, table_stride_2 - ) - - self.final_type_embedding = get_two_side_type_embedding(self, graph) - type_side_suffix = get_extra_embedding_net_suffix(type_one_side=False) - self.matrix = get_extra_side_embedding_net_variable( - self, graph_def, type_side_suffix, "matrix", suffix - ) - self.bias = get_extra_side_embedding_net_variable( - self, graph_def, type_side_suffix, "bias", suffix - ) - self.two_embd = make_data(self, self.final_type_embedding) - - self.davg = get_tensor_by_name_from_graph( - graph, "descrpt_attr%s/t_avg" % suffix - ) - self.dstd = get_tensor_by_name_from_graph( - graph, "descrpt_attr%s/t_std" % suffix - ) - - def build( - self, - coord_: tf.Tensor, - atype_: tf.Tensor, - natoms: tf.Tensor, - box_: tf.Tensor, - mesh: tf.Tensor, - input_dict: dict, - reuse: Optional[bool] = None, - suffix: str = "", - ) -> tf.Tensor: - """Build the computational graph for the descriptor. - - Parameters - ---------- - coord_ - The coordinate of atoms - atype_ - The type of atoms - natoms - The number of atoms. This tensor has the length of Ntypes + 2 - natoms[0]: number of local atoms - natoms[1]: total number of atoms held by this processor - natoms[i]: 2 <= i < Ntypes+2, number of type i atoms - box_ : tf.Tensor - The box of the system - mesh - For historical reasons, only the length of the Tensor matters. - if size of mesh == 6, pbc is assumed. - if size of mesh == 0, no-pbc is assumed. - input_dict - Dictionary for additional inputs - reuse - The weights in the networks should be reused when get the variable. - suffix - Name suffix to identify this descriptor - - Returns - ------- - descriptor - The output descriptor - """ - davg = self.davg - dstd = self.dstd - if nvnmd_cfg.enable: - nvnmd_cfg.set_ntype(self.ntypes) - if nvnmd_cfg.restore_descriptor: - davg, dstd = build_davg_dstd() - check_switch_range(davg, dstd) - with tf.variable_scope("descrpt_attr" + suffix, reuse=reuse): - if davg is None: - davg = np.zeros([self.ntypes, self.ndescrpt]) - if dstd is None: - dstd = np.ones([self.ntypes, self.ndescrpt]) - t_rcut = tf.constant( - np.max([self.rcut_r, self.rcut_a]), - name="rcut", - dtype=GLOBAL_TF_FLOAT_PRECISION, - ) - t_ntypes = tf.constant(self.ntypes, name="ntypes", dtype=tf.int32) - t_ndescrpt = tf.constant(self.ndescrpt, name="ndescrpt", dtype=tf.int32) - t_sel = tf.constant(self.sel_a, name="sel", dtype=tf.int32) - t_original_sel = tf.constant( - self.original_sel if self.original_sel is not None else self.sel_a, - name="original_sel", - dtype=tf.int32, - ) - self.t_avg = tf.get_variable( - "t_avg", - davg.shape, - dtype=GLOBAL_TF_FLOAT_PRECISION, - trainable=False, - initializer=tf.constant_initializer(davg), - ) - self.t_std = tf.get_variable( - "t_std", - dstd.shape, - dtype=GLOBAL_TF_FLOAT_PRECISION, - trainable=False, - initializer=tf.constant_initializer(dstd), - ) - - with tf.control_dependencies([t_sel, t_original_sel]): - coord = tf.reshape(coord_, [-1, natoms[1] * 3]) - box = tf.reshape(box_, [-1, 9]) - atype = tf.reshape(atype_, [-1, natoms[1]]) - self.attn_weight = [None for i in range(self.attn_layer)] - self.angular_weight = [None for i in range(self.attn_layer)] - self.attn_weight_final = [None for i in range(self.attn_layer)] - - op_descriptor = ( - build_op_descriptor() if nvnmd_cfg.enable else op_module.prod_env_mat_a_mix - ) - ( - self.descrpt, - self.descrpt_deriv, - self.rij, - self.nlist, - self.nei_type_vec, - self.nmask, - ) = op_descriptor( - coord, - atype, - natoms, - box, - mesh, - self.t_avg, - self.t_std, - rcut_a=self.rcut_a, - rcut_r=self.rcut_r, - rcut_r_smth=self.rcut_r_smth, - sel_a=self.sel_all_a, - sel_r=self.sel_all_r, - ) - - self.nei_type_vec = tf.reshape(self.nei_type_vec, [-1]) - self.nmask = tf.cast( - tf.reshape(self.nmask, [-1, 1, self.sel_all_a[0]]), - self.filter_precision, - ) - self.negative_mask = -(2 << 32) * (1.0 - self.nmask) - # hard coding the magnitude of attention weight shift - self.smth_attn_w_shift = 20.0 - # only used when tensorboard was set as true - tf.summary.histogram("descrpt", self.descrpt) - tf.summary.histogram("rij", self.rij) - tf.summary.histogram("nlist", self.nlist) - - self.descrpt_reshape = tf.reshape(self.descrpt, [-1, self.ndescrpt]) - # prevent lookup error; the actual atype already used for nlist - atype = tf.clip_by_value(atype, 0, self.ntypes - 1) - self.atype_nloc = tf.reshape( - tf.slice(atype, [0, 0], [-1, natoms[0]]), [-1] - ) ## lammps will have error without this - self._identity_tensors(suffix=suffix) - if self.smooth: - self.sliced_avg = tf.reshape( - tf.slice( - tf.reshape(self.t_avg, [self.ntypes, -1, 4]), [0, 0, 0], [-1, 1, 1] - ), - [self.ntypes, 1], - ) - self.sliced_std = tf.reshape( - tf.slice( - tf.reshape(self.t_std, [self.ntypes, -1, 4]), [0, 0, 0], [-1, 1, 1] - ), - [self.ntypes, 1], - ) - self.avg_looked_up = tf.reshape( - tf.nn.embedding_lookup(self.sliced_avg, self.atype_nloc), - [-1, natoms[0], 1], - ) - self.std_looked_up = tf.reshape( - tf.nn.embedding_lookup(self.sliced_std, self.atype_nloc), - [-1, natoms[0], 1], - ) - self.recovered_r = ( - tf.reshape( - tf.slice( - tf.reshape(self.descrpt_reshape, [-1, 4]), [0, 0], [-1, 1] - ), - [-1, natoms[0], self.sel_all_a[0]], - ) - * self.std_looked_up - + self.avg_looked_up - ) - uu = 1 - self.rcut_r_smth * self.recovered_r - self.recovered_switch = -uu * uu * uu + 1 - self.recovered_switch = tf.clip_by_value(self.recovered_switch, 0.0, 1.0) - self.recovered_switch = tf.cast( - self.recovered_switch, self.filter_precision - ) - - self.dout, self.qmat = self._pass_filter( - self.descrpt_reshape, - self.atype_nloc, - natoms, - input_dict, - suffix=suffix, - reuse=reuse, - trainable=self.trainable, - ) - - # only used when tensorboard was set as true - tf.summary.histogram("embedding_net_output", self.dout) - return self.dout - - def _pass_filter( - self, inputs, atype, natoms, input_dict, reuse=None, suffix="", trainable=True - ): - assert ( - input_dict is not None - and input_dict.get("type_embedding", None) is not None - ), "se_atten desctiptor must use type_embedding" - type_embedding = input_dict.get("type_embedding", None) - inputs = tf.reshape(inputs, [-1, natoms[0], self.ndescrpt]) - output = [] - output_qmat = [] - inputs_i = inputs - inputs_i = tf.reshape(inputs_i, [-1, self.ndescrpt]) - type_i = -1 - if len(self.exclude_types): - mask = self.build_type_exclude_mask( - self.exclude_types, - self.ntypes, - self.sel_a, - self.ndescrpt, - self.atype_nloc, # when nloc != nall, pass nloc to mask - tf.shape(inputs_i)[0], - self.nei_type_vec, # extra input for atten - ) - inputs_i *= mask - if nvnmd_cfg.enable and nvnmd_cfg.quantize_descriptor: - inputs_i = descrpt2r4(inputs_i, atype) - layer, qmat = self._filter( - inputs_i, - type_i, - natoms, - name="filter_type_all" + suffix, - suffix=suffix, - reuse=reuse, - trainable=trainable, - activation_fn=self.filter_activation_fn, - type_embedding=type_embedding, - atype=atype, - ) - layer = tf.reshape(layer, [tf.shape(inputs)[0], natoms[0], self.get_dim_out()]) - qmat = tf.reshape( - qmat, [tf.shape(inputs)[0], natoms[0], self.get_dim_rot_mat_1() * 3] - ) - output.append(layer) - output_qmat.append(qmat) - output = tf.concat(output, axis=1) - output_qmat = tf.concat(output_qmat, axis=1) - return output, output_qmat - - def _compute_dstats_sys_smth( - self, - data_coord, - data_box, - data_atype, - natoms_vec, - mesh, - mixed_type=False, - real_natoms_vec=None, - ): - dd_all, descrpt_deriv_t, rij_t, nlist_t, nei_type_vec_t, nmask_t = run_sess( - self.sub_sess, - [ - self.stat_descrpt, - self.descrpt_deriv_t, - self.rij_t, - self.nlist_t, - self.nei_type_vec_t, - self.nmask_t, - ], - feed_dict={ - self.place_holders["coord"]: data_coord, - self.place_holders["type"]: data_atype, - self.place_holders["natoms_vec"]: natoms_vec, - self.place_holders["box"]: data_box, - self.place_holders["default_mesh"]: mesh, - }, - ) - if mixed_type: - nframes = dd_all.shape[0] - sysr = [0.0 for i in range(self.ntypes)] - sysa = [0.0 for i in range(self.ntypes)] - sysn = [0 for i in range(self.ntypes)] - sysr2 = [0.0 for i in range(self.ntypes)] - sysa2 = [0.0 for i in range(self.ntypes)] - for ff in range(nframes): - natoms = real_natoms_vec[ff] - dd_ff = np.reshape(dd_all[ff], [-1, self.ndescrpt * natoms_vec[0]]) - start_index = 0 - for type_i in range(self.ntypes): - end_index = ( - start_index + self.ndescrpt * natoms[2 + type_i] - ) # center atom split - dd = dd_ff[:, start_index:end_index] - dd = np.reshape( - dd, [-1, self.ndescrpt] - ) # nframes * typen_atoms , nnei * 4 - start_index = end_index - # compute - dd = np.reshape(dd, [-1, 4]) # nframes * typen_atoms * nnei, 4 - ddr = dd[:, :1] - dda = dd[:, 1:] - sumr = np.sum(ddr) - suma = np.sum(dda) / 3.0 - sumn = dd.shape[0] - sumr2 = np.sum(np.multiply(ddr, ddr)) - suma2 = np.sum(np.multiply(dda, dda)) / 3.0 - sysr[type_i] += sumr - sysa[type_i] += suma - sysn[type_i] += sumn - sysr2[type_i] += sumr2 - sysa2[type_i] += suma2 - else: - natoms = natoms_vec - dd_all = np.reshape(dd_all, [-1, self.ndescrpt * natoms[0]]) - start_index = 0 - sysr = [] - sysa = [] - sysn = [] - sysr2 = [] - sysa2 = [] - for type_i in range(self.ntypes): - end_index = ( - start_index + self.ndescrpt * natoms[2 + type_i] - ) # center atom split - dd = dd_all[:, start_index:end_index] - dd = np.reshape( - dd, [-1, self.ndescrpt] - ) # nframes * typen_atoms , nnei * 4 - start_index = end_index - # compute - dd = np.reshape(dd, [-1, 4]) # nframes * typen_atoms * nnei, 4 - ddr = dd[:, :1] - dda = dd[:, 1:] - sumr = np.sum(ddr) - suma = np.sum(dda) / 3.0 - sumn = dd.shape[0] - sumr2 = np.sum(np.multiply(ddr, ddr)) - suma2 = np.sum(np.multiply(dda, dda)) / 3.0 - sysr.append(sumr) - sysa.append(suma) - sysn.append(sumn) - sysr2.append(sumr2) - sysa2.append(suma2) - return sysr, sysr2, sysa, sysa2, sysn - - def _lookup_type_embedding( - self, - xyz_scatter, - natype, - type_embedding, - ): - """Concatenate `type_embedding` of neighbors and `xyz_scatter`. - If not self.type_one_side, concatenate `type_embedding` of center atoms as well. - - Parameters - ---------- - xyz_scatter: - shape is [nframes*natoms[0]*self.nnei, 1] - natype: - neighbor atom type - type_embedding: - shape is [self.ntypes, Y] where Y=jdata['type_embedding']['neuron'][-1] - - Returns - ------- - embedding: - environment of each atom represented by embedding. - """ - te_out_dim = type_embedding.get_shape().as_list()[-1] - self.test_type_embedding = type_embedding - self.test_nei_embed = tf.nn.embedding_lookup( - type_embedding, self.nei_type_vec - ) # shape is [self.nnei, 1+te_out_dim] - # nei_embed = tf.tile(nei_embed, (nframes * natoms[0], 1)) # shape is [nframes*natoms[0]*self.nnei, te_out_dim] - nei_embed = tf.reshape(self.test_nei_embed, [-1, te_out_dim]) - self.embedding_input = tf.concat( - [xyz_scatter, nei_embed], 1 - ) # shape is [nframes*natoms[0]*self.nnei, 1+te_out_dim] - if not self.type_one_side: - self.atm_embed = tf.nn.embedding_lookup( - type_embedding, natype - ) # shape is [nframes*natoms[0], te_out_dim] - self.atm_embed = tf.tile( - self.atm_embed, [1, self.nnei] - ) # shape is [nframes*natoms[0], self.nnei*te_out_dim] - self.atm_embed = tf.reshape( - self.atm_embed, [-1, te_out_dim] - ) # shape is [nframes*natoms[0]*self.nnei, te_out_dim] - self.embedding_input_2 = tf.concat( - [self.embedding_input, self.atm_embed], 1 - ) # shape is [nframes*natoms[0]*self.nnei, 1+te_out_dim+te_out_dim] - return self.embedding_input_2 - return self.embedding_input - - def _feedforward(self, input_xyz, d_in, d_mid): - residual = input_xyz - input_xyz = tf.nn.relu( - one_layer( - input_xyz, - d_mid, - name="c_ffn1", - reuse=tf.AUTO_REUSE, - seed=self.seed, - activation_fn=None, - precision=self.filter_precision, - trainable=True, - uniform_seed=self.uniform_seed, - initial_variables=self.attention_layer_variables, - ) - ) - input_xyz = one_layer( - input_xyz, - d_in, - name="c_ffn2", - reuse=tf.AUTO_REUSE, - seed=self.seed, - activation_fn=None, - precision=self.filter_precision, - trainable=True, - uniform_seed=self.uniform_seed, - initial_variables=self.attention_layer_variables, - ) - input_xyz += residual - input_xyz = tf.keras.layers.LayerNormalization()(input_xyz) - return input_xyz - - def _scaled_dot_attn( - self, - Q, - K, - V, - temperature, - input_r, - dotr=False, - do_mask=False, - layer=0, - save_weights=True, - ): - attn = tf.matmul(Q / temperature, K, transpose_b=True) - if self.smooth: - # (nb x nloc) x nsel - nsel = self.sel_all_a[0] - attn = (attn + self.smth_attn_w_shift) * tf.reshape( - self.recovered_switch, [-1, 1, nsel] - ) * tf.reshape( - self.recovered_switch, [-1, nsel, 1] - ) - self.smth_attn_w_shift - else: - attn *= self.nmask - attn += self.negative_mask - attn = tf.nn.softmax(attn, axis=-1) - if self.smooth: - attn = ( - attn - * tf.reshape(self.recovered_switch, [-1, 1, nsel]) - * tf.reshape(self.recovered_switch, [-1, nsel, 1]) - ) - else: - attn *= tf.reshape(self.nmask, [-1, attn.shape[-1], 1]) - if save_weights: - self.attn_weight[layer] = attn[0] # atom 0 - if dotr: - angular_weight = tf.matmul(input_r, input_r, transpose_b=True) # normalized - attn *= angular_weight - if save_weights: - self.angular_weight[layer] = angular_weight[0] # atom 0 - self.attn_weight_final[layer] = attn[0] # atom 0 - if do_mask: - nei = int(attn.shape[-1]) - mask = tf.cast(tf.ones((nei, nei)) - tf.eye(nei), self.filter_precision) - attn *= mask - output = tf.matmul(attn, V) - return output - - def _attention_layers( - self, - input_xyz, - layer_num, - shape_i, - outputs_size, - input_r, - dotr=False, - do_mask=False, - trainable=True, - suffix="", - ): - sd_k = tf.sqrt(tf.cast(1.0, dtype=self.filter_precision)) - for i in range(layer_num): - name = f"attention_layer_{i}{suffix}" - with tf.variable_scope(name, reuse=tf.AUTO_REUSE): - # input_xyz_in = tf.nn.l2_normalize(input_xyz, -1) - Q_c = one_layer( - input_xyz, - self.att_n, - name="c_query", - scope=name + "/", - reuse=tf.AUTO_REUSE, - seed=self.seed, - activation_fn=None, - precision=self.filter_precision, - trainable=trainable, - uniform_seed=self.uniform_seed, - initial_variables=self.attention_layer_variables, - ) - K_c = one_layer( - input_xyz, - self.att_n, - name="c_key", - scope=name + "/", - reuse=tf.AUTO_REUSE, - seed=self.seed, - activation_fn=None, - precision=self.filter_precision, - trainable=trainable, - uniform_seed=self.uniform_seed, - initial_variables=self.attention_layer_variables, - ) - V_c = one_layer( - input_xyz, - self.att_n, - name="c_value", - scope=name + "/", - reuse=tf.AUTO_REUSE, - seed=self.seed, - activation_fn=None, - precision=self.filter_precision, - trainable=trainable, - uniform_seed=self.uniform_seed, - initial_variables=self.attention_layer_variables, - ) - # # natom x nei_type_i x out_size - # xyz_scatter = tf.reshape(xyz_scatter, (-1, shape_i[1] // 4, outputs_size[-1])) - # natom x nei_type_i x att_n - Q_c = tf.nn.l2_normalize( - tf.reshape(Q_c, (-1, shape_i[1] // 4, self.att_n)), -1 - ) - K_c = tf.nn.l2_normalize( - tf.reshape(K_c, (-1, shape_i[1] // 4, self.att_n)), -1 - ) - V_c = tf.nn.l2_normalize( - tf.reshape(V_c, (-1, shape_i[1] // 4, self.att_n)), -1 - ) - - input_att = self._scaled_dot_attn( - Q_c, K_c, V_c, sd_k, input_r, dotr=dotr, do_mask=do_mask, layer=i - ) - input_att = tf.reshape(input_att, (-1, self.att_n)) - - # (natom x nei_type_i) x out_size - input_xyz += one_layer( - input_att, - outputs_size[-1], - name="c_out", - scope=name + "/", - reuse=tf.AUTO_REUSE, - seed=self.seed, - activation_fn=None, - precision=self.filter_precision, - trainable=trainable, - uniform_seed=self.uniform_seed, - initial_variables=self.attention_layer_variables, - ) - input_xyz = tf.keras.layers.LayerNormalization( - beta_initializer=tf.constant_initializer(self.beta[i]), - gamma_initializer=tf.constant_initializer(self.gamma[i]), - )(input_xyz) - # input_xyz = self._feedforward(input_xyz, outputs_size[-1], self.att_n) - return input_xyz - - def _filter_lower( - self, - type_i, - type_input, - start_index, - incrs_index, - inputs, - type_embedding=None, - atype=None, - is_exclude=False, - activation_fn=None, - bavg=0.0, - stddev=1.0, - trainable=True, - suffix="", - name="filter_", - reuse=None, - ): - """Input env matrix, returns R.G.""" - outputs_size = [1, *self.filter_neuron] - # cut-out inputs - # with natom x (nei_type_i x 4) - inputs_i = tf.slice(inputs, [0, start_index * 4], [-1, incrs_index * 4]) - shape_i = inputs_i.get_shape().as_list() - natom = tf.shape(inputs_i)[0] - # with (natom x nei_type_i) x 4 - inputs_reshape = tf.reshape(inputs_i, [-1, 4]) - # with (natom x nei_type_i) x 1 - xyz_scatter = tf.reshape(tf.slice(inputs_reshape, [0, 0], [-1, 1]), [-1, 1]) - assert atype is not None, "atype must exist!!" - type_embedding = tf.cast(type_embedding, self.filter_precision) # ntypes * Y - # natom x 4 x outputs_size - if not is_exclude: - with tf.variable_scope(name, reuse=reuse): - # with (natom x nei_type_i) x out_size - if not self.stripped_type_embedding: - log.info("use the previous se_atten model") - xyz_scatter = self._lookup_type_embedding( - xyz_scatter, atype, type_embedding - ) - xyz_scatter = embedding_net( - xyz_scatter, - self.filter_neuron, - self.filter_precision, - activation_fn=activation_fn, - resnet_dt=self.filter_resnet_dt, - name_suffix="", - stddev=stddev, - bavg=bavg, - seed=self.seed, - trainable=trainable, - uniform_seed=self.uniform_seed, - initial_variables=self.embedding_net_variables, - mixed_prec=self.mixed_prec, - ) - else: - if self.attn_layer == 0: - log.info( - "use the compressible model with stripped type embedding" - ) - else: - log.info( - "use the non-compressible model with stripped type embedding" - ) - if nvnmd_cfg.enable: - if nvnmd_cfg.quantize_descriptor: - return filter_lower_R42GR( - inputs_i, - atype, - self.nei_type_vec, - ) - elif nvnmd_cfg.restore_descriptor: - self.embedding_net_variables = ( - nvnmd_cfg.get_dp_init_weights() - ) - self.two_side_embeeding_net_variables = ( - nvnmd_cfg.get_dp_init_weights() - ) - if not self.compress: - xyz_scatter = embedding_net( - xyz_scatter, - self.filter_neuron, - self.filter_precision, - activation_fn=activation_fn, - resnet_dt=self.filter_resnet_dt, - name_suffix="", - stddev=stddev, - bavg=bavg, - seed=self.seed, - trainable=trainable, - uniform_seed=self.uniform_seed, - initial_variables=self.embedding_net_variables, - mixed_prec=self.mixed_prec, - ) - else: - net = "filter_net" - info = [ - self.lower[net], - self.upper[net], - self.upper[net] * self.table_config[0], - self.table_config[1], - self.table_config[2], - self.table_config[3], - ] - - padding_ntypes = type_embedding.shape[ - 0 - ] # this must be self.ntypes + 1 - atype_expand = tf.reshape(atype, [-1, 1]) - idx_i = tf.tile(atype_expand * padding_ntypes, [1, self.nnei]) - idx_j = tf.reshape(self.nei_type_vec, [-1, self.nnei]) - idx = idx_i + idx_j - index_of_two_side = tf.reshape(idx, [-1]) - - if self.compress: - two_embd = tf.nn.embedding_lookup( - self.two_embd, index_of_two_side - ) - else: - type_embedding_nei = tf.tile( - tf.reshape(type_embedding, [1, padding_ntypes, -1]), - [padding_ntypes, 1, 1], - ) # (ntypes) * ntypes * Y - type_embedding_center = tf.tile( - tf.reshape(type_embedding, [padding_ntypes, 1, -1]), - [1, padding_ntypes, 1], - ) # ntypes * (ntypes) * Y - two_side_type_embedding = tf.concat( - [type_embedding_nei, type_embedding_center], -1 - ) # ntypes * ntypes * (Y+Y) - two_side_type_embedding = tf.reshape( - two_side_type_embedding, - [-1, two_side_type_embedding.shape[-1]], - ) - embedding_of_two_side_type_embedding = embedding_net( - two_side_type_embedding, - self.filter_neuron, - self.filter_precision, - activation_fn=activation_fn, - resnet_dt=self.filter_resnet_dt, - name_suffix=get_extra_embedding_net_suffix( - type_one_side=False - ), - stddev=stddev, - bavg=bavg, - seed=self.seed, - trainable=trainable, - uniform_seed=self.uniform_seed, - initial_variables=self.two_side_embeeding_net_variables, - mixed_prec=self.mixed_prec, - ) - two_embd = tf.nn.embedding_lookup( - embedding_of_two_side_type_embedding, index_of_two_side - ) - if self.smooth: - two_embd = two_embd * tf.reshape(self.recovered_switch, [-1, 1]) - if not self.compress: - xyz_scatter = xyz_scatter * two_embd + xyz_scatter - else: - return op_module.tabulate_fusion_se_atten( - tf.cast(self.table.data[net], self.filter_precision), - info, - xyz_scatter, - tf.reshape(inputs_i, [natom, shape_i[1] // 4, 4]), - two_embd, - last_layer_size=outputs_size[-1], - is_sorted=len(self.exclude_types) == 0, - ) - - if (not self.uniform_seed) and (self.seed is not None): - self.seed += self.seed_shift - input_r = tf.slice( - tf.reshape(inputs_i, (-1, shape_i[1] // 4, 4)), [0, 0, 1], [-1, -1, 3] - ) - input_r = tf.nn.l2_normalize(input_r, -1) - # natom x nei_type_i x out_size - xyz_scatter_att = tf.reshape( - self._attention_layers( - xyz_scatter, - self.attn_layer, - shape_i, - outputs_size, - input_r, - dotr=self.attn_dotr, - do_mask=self.attn_mask, - trainable=trainable, - suffix=suffix, - ), - (-1, shape_i[1] // 4, outputs_size[-1]), - ) - # xyz_scatter = tf.reshape(xyz_scatter, (-1, shape_i[1] // 4, outputs_size[-1])) - else: - raise RuntimeError("this should not be touched") - # When using tf.reshape(inputs_i, [-1, shape_i[1]//4, 4]) below - # [588 24] -> [588 6 4] correct - # but if sel is zero - # [588 0] -> [147 0 4] incorrect; the correct one is [588 0 4] - # So we need to explicitly assign the shape to tf.shape(inputs_i)[0] instead of -1 - return tf.matmul( - tf.reshape(inputs_i, [natom, shape_i[1] // 4, 4]), - xyz_scatter_att, - transpose_a=True, - ) - - @cast_precision - def _filter( - self, - inputs, - type_input, - natoms, - type_embedding=None, - atype=None, - activation_fn=tf.nn.tanh, - stddev=1.0, - bavg=0.0, - suffix="", - name="linear", - reuse=None, - trainable=True, - ): - nframes = tf.shape(tf.reshape(inputs, [-1, natoms[0], self.ndescrpt]))[0] - # natom x (nei x 4) - shape = inputs.get_shape().as_list() - outputs_size = [1, *self.filter_neuron] - outputs_size_2 = self.n_axis_neuron - - start_index = 0 - type_i = 0 - # natom x 4 x outputs_size - xyz_scatter_1 = self._filter_lower( - type_i, - type_input, - start_index, - np.cumsum(self.sel_a)[-1], - inputs, - type_embedding=type_embedding, - is_exclude=False, - activation_fn=activation_fn, - stddev=stddev, - bavg=bavg, - trainable=trainable, - suffix=suffix, - name=name, - reuse=reuse, - atype=atype, - ) - if nvnmd_cfg.enable: - return filter_GR2D(xyz_scatter_1) - # natom x nei x outputs_size - # xyz_scatter = tf.concat(xyz_scatter_total, axis=1) - # natom x nei x 4 - # inputs_reshape = tf.reshape(inputs, [-1, shape[1]//4, 4]) - # natom x 4 x outputs_size - # xyz_scatter_1 = tf.matmul(inputs_reshape, xyz_scatter, transpose_a = True) - if self.original_sel is None: - # shape[1] = nnei * 4 - nnei = shape[1] / 4 - else: - nnei = tf.cast( - tf.Variable( - np.sum(self.original_sel), - dtype=tf.int32, - trainable=False, - name="nnei", - ), - self.filter_precision, - ) - xyz_scatter_1 = xyz_scatter_1 / nnei - # natom x 4 x outputs_size_2 - xyz_scatter_2 = tf.slice(xyz_scatter_1, [0, 0, 0], [-1, -1, outputs_size_2]) - # # natom x 3 x outputs_size_2 - # qmat = tf.slice(xyz_scatter_2, [0,1,0], [-1, 3, -1]) - # natom x 3 x outputs_size_1 - qmat = tf.slice(xyz_scatter_1, [0, 1, 0], [-1, 3, -1]) - # natom x outputs_size_1 x 3 - qmat = tf.transpose(qmat, perm=[0, 2, 1]) - # natom x outputs_size x outputs_size_2 - result = tf.matmul(xyz_scatter_1, xyz_scatter_2, transpose_a=True) - # natom x (outputs_size x outputs_size_2) - result = tf.reshape(result, [-1, outputs_size_2 * outputs_size[-1]]) - - return result, qmat - - def init_variables( - self, - graph: tf.Graph, - graph_def: tf.GraphDef, - suffix: str = "", - ) -> None: - """Init the embedding net variables with the given dict. - - Parameters - ---------- - graph : tf.Graph - The input frozen model graph - graph_def : tf.GraphDef - The input frozen model graph_def - suffix : str, optional - The suffix of the scope - """ - super().init_variables(graph=graph, graph_def=graph_def, suffix=suffix) - - self.attention_layer_variables = get_attention_layer_variables_from_graph_def( - graph_def, suffix=suffix - ) - if self.attn_layer > 0: - self.beta[0] = self.attention_layer_variables[ - f"attention_layer_0{suffix}/layer_normalization/beta" - ] - self.gamma[0] = self.attention_layer_variables[ - f"attention_layer_0{suffix}/layer_normalization/gamma" - ] - for i in range(1, self.attn_layer): - self.beta[i] = self.attention_layer_variables[ - f"attention_layer_{i}{suffix}/layer_normalization_{i}/beta" - ] - self.gamma[i] = self.attention_layer_variables[ - f"attention_layer_{i}{suffix}/layer_normalization_{i}/gamma" - ] - - if self.stripped_type_embedding: - self.two_side_embeeding_net_variables = ( - get_extra_embedding_net_variables_from_graph_def( - graph_def, - suffix, - get_extra_embedding_net_suffix(type_one_side=False), - self.layer_size, - ) - ) - - def build_type_exclude_mask( - self, - exclude_types: List[Tuple[int, int]], - ntypes: int, - sel: List[int], - ndescrpt: int, - atype: tf.Tensor, - shape0: tf.Tensor, - nei_type_vec: tf.Tensor, - ) -> tf.Tensor: - r"""Build the type exclude mask for the attention descriptor. - - Notes - ----- - This method has the similiar way to build the type exclude mask as - :meth:`deepmd.descriptor.descriptor.Descriptor.build_type_exclude_mask`. - The mathmatical expression has been explained in that method. - The difference is that the attention descriptor has provided the type of - the neighbors (idx_j) that is not in order, so we use it from an extra - input. - - Parameters - ---------- - exclude_types : List[Tuple[int, int]] - The list of excluded types, e.g. [(0, 1), (1, 0)] means the interaction - between type 0 and type 1 is excluded. - ntypes : int - The number of types. - sel : List[int] - The list of the number of selected neighbors for each type. - ndescrpt : int - The number of descriptors for each atom. - atype : tf.Tensor - The type of atoms, with the size of shape0. - shape0 : tf.Tensor - The shape of the first dimension of the inputs, which is equal to - nsamples * natoms. - nei_type_vec : tf.Tensor - The type of neighbors, with the size of (shape0, nnei). - - Returns - ------- - tf.Tensor - The type exclude mask, with the shape of (shape0, ndescrpt), and the - precision of GLOBAL_TF_FLOAT_PRECISION. The mask has the value of 1 if the - interaction between two types is not excluded, and 0 otherwise. - - See Also - -------- - deepmd.descriptor.descriptor.Descriptor.build_type_exclude_mask - """ - # generate a mask - # op returns ntypes when the neighbor doesn't exist, so we need to add 1 - type_mask = np.array( - [ - [ - 1 if (tt_i, tt_j) not in exclude_types else 0 - for tt_i in range(ntypes + 1) - ] - for tt_j in range(ntypes) - ], - dtype=bool, - ) - type_mask = tf.convert_to_tensor(type_mask, dtype=GLOBAL_TF_FLOAT_PRECISION) - type_mask = tf.reshape(type_mask, [-1]) - - # (nsamples * natoms, 1) - atype_expand = tf.reshape(atype, [-1, 1]) - # (nsamples * natoms, ndescrpt) - idx_i = tf.tile(atype_expand * (ntypes + 1), (1, ndescrpt)) - # idx_j has been provided by atten op - # (nsamples * natoms, nnei, 1) - idx_j = tf.reshape(nei_type_vec, [shape0, sel[0], 1]) - # (nsamples * natoms, nnei, ndescrpt // nnei) - idx_j = tf.tile(idx_j, (1, 1, ndescrpt // sel[0])) - # (nsamples * natoms, ndescrpt) - idx_j = tf.reshape(idx_j, [shape0, ndescrpt]) - idx = idx_i + idx_j - idx = tf.reshape(idx, [-1]) - mask = tf.nn.embedding_lookup(type_mask, idx) - # same as inputs_i, (nsamples * natoms, ndescrpt) - mask = tf.reshape(mask, [-1, ndescrpt]) - return mask - - @property - def explicit_ntypes(self) -> bool: - """Explicit ntypes with type embedding.""" - return True - - @classmethod - def update_sel(cls, global_jdata: dict, local_jdata: dict): - """Update the selection and perform neighbor statistics. - - Parameters - ---------- - global_jdata : dict - The global data, containing the training section - local_jdata : dict - The local data refer to the current class - """ - from deepmd.entrypoints.train import ( - update_one_sel, - ) - - local_jdata_cpy = local_jdata.copy() - return update_one_sel(global_jdata, local_jdata_cpy, True) diff --git a/deepmd/dpmodel/__init__.py b/deepmd/dpmodel/__init__.py new file mode 100644 index 0000000000..111c2d6ced --- /dev/null +++ b/deepmd/dpmodel/__init__.py @@ -0,0 +1,41 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from deepmd.utils.entry_point import ( + load_entry_point, +) + +from .common import ( + DEFAULT_PRECISION, + PRECISION_DICT, + NativeOP, +) +from .model import ( + DPModelCommon, +) +from .output_def import ( + FittingOutputDef, + ModelOutputDef, + OutputVariableDef, + fitting_check_output, + get_deriv_name, + get_hessian_name, + get_reduce_name, + model_check_output, +) + +__all__ = [ + "DPModelCommon", + "PRECISION_DICT", + "DEFAULT_PRECISION", + "NativeOP", + "ModelOutputDef", + "FittingOutputDef", + "OutputVariableDef", + "model_check_output", + "fitting_check_output", + "get_reduce_name", + "get_deriv_name", + "get_hessian_name", +] + + +load_entry_point("deepmd.dpmodel") diff --git a/deepmd/dpmodel/array_api.py b/deepmd/dpmodel/array_api.py new file mode 100644 index 0000000000..e5c0557851 --- /dev/null +++ b/deepmd/dpmodel/array_api.py @@ -0,0 +1,75 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +"""Utilities for the array API.""" + +import array_api_compat +from packaging.version import ( + Version, +) + + +def support_array_api(version: str) -> callable: + """Mark a function as supporting the specific version of the array API. + + Parameters + ---------- + version : str + The version of the array API + + Returns + ------- + callable + The decorated function + + Examples + -------- + >>> @support_array_api(version="2022.12") + ... def f(x): + ... pass + """ + + def set_version(func: callable) -> callable: + func.array_api_version = version + return func + + return set_version + + +# array api adds take_along_axis in https://github.com/data-apis/array-api/pull/816 +# but it hasn't been released yet +# below is a pure Python implementation of take_along_axis +# https://github.com/data-apis/array-api/issues/177#issuecomment-2093630595 +def xp_swapaxes(a, axis1, axis2): + xp = array_api_compat.array_namespace(a) + axes = list(range(a.ndim)) + axes[axis1], axes[axis2] = axes[axis2], axes[axis1] + a = xp.permute_dims(a, axes) + return a + + +def xp_take_along_axis(arr, indices, axis): + xp = array_api_compat.array_namespace(arr) + if Version(xp.__array_api_version__) >= Version("2024.12"): + # see: https://github.com/data-apis/array-api-strict/blob/d086c619a58f35c38240592ef994aa19ca7beebc/array_api_strict/_indexing_functions.py#L30-L39 + return xp.take_along_axis(arr, indices, axis=axis) + arr = xp_swapaxes(arr, axis, -1) + indices = xp_swapaxes(indices, axis, -1) + + m = arr.shape[-1] + n = indices.shape[-1] + + shape = list(arr.shape) + shape.pop(-1) + shape = [*shape, n] + + arr = xp.reshape(arr, (-1,)) + if n != 0: + indices = xp.reshape(indices, (-1, n)) + else: + indices = xp.reshape(indices, (0, 0)) + + offset = (xp.arange(indices.shape[0], dtype=indices.dtype) * m)[:, xp.newaxis] + indices = xp.reshape(offset + indices, (-1,)) + + out = xp.take(arr, indices) + out = xp.reshape(out, shape) + return xp_swapaxes(out, axis, -1) diff --git a/deepmd/dpmodel/atomic_model/__init__.py b/deepmd/dpmodel/atomic_model/__init__.py new file mode 100644 index 0000000000..4f4ef32e03 --- /dev/null +++ b/deepmd/dpmodel/atomic_model/__init__.py @@ -0,0 +1,57 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +"""The atomic model provides the prediction of some property on each +atom. All the atomic models are not supposed to be directly accessed +by users, but it provides a convenient interface for the +implementation of models. + +Taking the energy models for example, the developeres only needs to +implement the atomic energy prediction via an atomic model, and the +model can be automatically made by the `deepmd.dpmodel.make_model` +method. The `DPModel` is made by +``` +DPModel = make_model(DPAtomicModel) +``` + +""" + +from .base_atomic_model import ( + BaseAtomicModel, +) +from .dipole_atomic_model import ( + DPDipoleAtomicModel, +) +from .dos_atomic_model import ( + DPDOSAtomicModel, +) +from .dp_atomic_model import ( + DPAtomicModel, +) +from .energy_atomic_model import ( + DPEnergyAtomicModel, +) +from .linear_atomic_model import ( + DPZBLLinearEnergyAtomicModel, + LinearEnergyAtomicModel, +) +from .make_base_atomic_model import ( + make_base_atomic_model, +) +from .pairtab_atomic_model import ( + PairTabAtomicModel, +) +from .polar_atomic_model import ( + DPPolarAtomicModel, +) + +__all__ = [ + "make_base_atomic_model", + "BaseAtomicModel", + "DPAtomicModel", + "DPEnergyAtomicModel", + "PairTabAtomicModel", + "LinearEnergyAtomicModel", + "DPZBLLinearEnergyAtomicModel", + "DPDOSAtomicModel", + "DPPolarAtomicModel", + "DPDipoleAtomicModel", +] diff --git a/deepmd/dpmodel/atomic_model/base_atomic_model.py b/deepmd/dpmodel/atomic_model/base_atomic_model.py new file mode 100644 index 0000000000..eb95886598 --- /dev/null +++ b/deepmd/dpmodel/atomic_model/base_atomic_model.py @@ -0,0 +1,321 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import math +from typing import ( + Optional, +) + +import array_api_compat +import numpy as np + +from deepmd.dpmodel.common import ( + NativeOP, + to_numpy_array, +) +from deepmd.dpmodel.output_def import ( + FittingOutputDef, + OutputVariableDef, +) +from deepmd.dpmodel.utils import ( + AtomExcludeMask, + PairExcludeMask, +) +from deepmd.env import ( + GLOBAL_NP_FLOAT_PRECISION, +) +from deepmd.utils.finetune import ( + get_index_between_two_maps, + map_atom_exclude_types, + map_pair_exclude_types, +) + +from .make_base_atomic_model import ( + make_base_atomic_model, +) + +BaseAtomicModel_ = make_base_atomic_model(np.ndarray) + + +class BaseAtomicModel(BaseAtomicModel_, NativeOP): + def __init__( + self, + type_map: list[str], + atom_exclude_types: list[int] = [], + pair_exclude_types: list[tuple[int, int]] = [], + rcond: Optional[float] = None, + preset_out_bias: Optional[dict[str, np.ndarray]] = None, + ) -> None: + super().__init__() + self.type_map = type_map + self.reinit_atom_exclude(atom_exclude_types) + self.reinit_pair_exclude(pair_exclude_types) + self.rcond = rcond + self.preset_out_bias = preset_out_bias + + def init_out_stat(self) -> None: + """Initialize the output bias.""" + ntypes = self.get_ntypes() + self.bias_keys: list[str] = list(self.fitting_output_def().keys()) + self.max_out_size = max( + [self.atomic_output_def()[kk].size for kk in self.bias_keys] + ) + self.n_out = len(self.bias_keys) + out_bias_data = np.zeros( + [self.n_out, ntypes, self.max_out_size], dtype=GLOBAL_NP_FLOAT_PRECISION + ) + out_std_data = np.ones( + [self.n_out, ntypes, self.max_out_size], dtype=GLOBAL_NP_FLOAT_PRECISION + ) + self.out_bias = out_bias_data + self.out_std = out_std_data + + def __setitem__(self, key, value) -> None: + if key in ["out_bias"]: + self.out_bias = value + elif key in ["out_std"]: + self.out_std = value + else: + raise KeyError(key) + + def __getitem__(self, key): + if key in ["out_bias"]: + return self.out_bias + elif key in ["out_std"]: + return self.out_std + else: + raise KeyError(key) + + def get_type_map(self) -> list[str]: + """Get the type map.""" + return self.type_map + + def reinit_atom_exclude( + self, + exclude_types: list[int] = [], + ) -> None: + self.atom_exclude_types = exclude_types + if exclude_types == []: + self.atom_excl = None + else: + self.atom_excl = AtomExcludeMask(self.get_ntypes(), self.atom_exclude_types) + + def reinit_pair_exclude( + self, + exclude_types: list[tuple[int, int]] = [], + ) -> None: + self.pair_exclude_types = exclude_types + if exclude_types == []: + self.pair_excl = None + else: + self.pair_excl = PairExcludeMask(self.get_ntypes(), self.pair_exclude_types) + + def atomic_output_def(self) -> FittingOutputDef: + old_def = self.fitting_output_def() + old_list = list(old_def.get_data().values()) + return FittingOutputDef( + old_list # noqa:RUF005 + + [ + OutputVariableDef( + name="mask", + shape=[1], + reducible=False, + r_differentiable=False, + c_differentiable=False, + ) + ] + ) + + def change_type_map( + self, type_map: list[str], model_with_new_type_stat=None + ) -> None: + """Change the type related params to new ones, according to `type_map` and the original one in the model. + If there are new types in `type_map`, statistics will be updated accordingly to `model_with_new_type_stat` for these new types. + """ + remap_index, has_new_type = get_index_between_two_maps(self.type_map, type_map) + self.type_map = type_map + self.reinit_atom_exclude( + map_atom_exclude_types(self.atom_exclude_types, remap_index) + ) + self.reinit_pair_exclude( + map_pair_exclude_types(self.pair_exclude_types, remap_index) + ) + self.out_bias = self.out_bias[:, remap_index, :] + self.out_std = self.out_std[:, remap_index, :] + + def forward_common_atomic( + self, + extended_coord: np.ndarray, + extended_atype: np.ndarray, + nlist: np.ndarray, + mapping: Optional[np.ndarray] = None, + fparam: Optional[np.ndarray] = None, + aparam: Optional[np.ndarray] = None, + ) -> dict[str, np.ndarray]: + """Common interface for atomic inference. + + This method accept extended coordinates, extended atom typs, neighbor list, + and predict the atomic contribution of the fit property. + + Parameters + ---------- + extended_coord + extended coordinates, shape: nf x (nall x 3) + extended_atype + extended atom typs, shape: nf x nall + for a type < 0 indicating the atomic is virtual. + nlist + neighbor list, shape: nf x nloc x nsel + mapping + extended to local index mapping, shape: nf x nall + fparam + frame parameters, shape: nf x dim_fparam + aparam + atomic parameter, shape: nf x nloc x dim_aparam + + Returns + ------- + ret_dict + dict of output atomic properties. + should implement the definition of `fitting_output_def`. + ret_dict["mask"] of shape nf x nloc will be provided. + ret_dict["mask"][ff,ii] == 1 indicating the ii-th atom of the ff-th frame is real. + ret_dict["mask"][ff,ii] == 0 indicating the ii-th atom of the ff-th frame is virtual. + + """ + xp = array_api_compat.array_namespace(extended_coord, extended_atype, nlist) + _, nloc, _ = nlist.shape + atype = extended_atype[:, :nloc] + if self.pair_excl is not None: + pair_mask = self.pair_excl.build_type_exclude_mask(nlist, extended_atype) + # exclude neighbors in the nlist + nlist = xp.where(pair_mask == 1, nlist, -1) + + ext_atom_mask = self.make_atom_mask(extended_atype) + ret_dict = self.forward_atomic( + extended_coord, + xp.where(ext_atom_mask, extended_atype, 0), + nlist, + mapping=mapping, + fparam=fparam, + aparam=aparam, + ) + ret_dict = self.apply_out_stat(ret_dict, atype) + + # nf x nloc + atom_mask = ext_atom_mask[:, :nloc] + if self.atom_excl is not None: + atom_mask = xp.logical_and( + atom_mask, self.atom_excl.build_type_exclude_mask(atype) + ) + + for kk in ret_dict.keys(): + out_shape = ret_dict[kk].shape + out_shape2 = math.prod(out_shape[2:]) + tmp_arr = ret_dict[kk].reshape([out_shape[0], out_shape[1], out_shape2]) + tmp_arr = xp.where(atom_mask[:, :, None], tmp_arr, xp.zeros_like(tmp_arr)) + ret_dict[kk] = xp.reshape(tmp_arr, out_shape) + ret_dict["mask"] = xp.astype(atom_mask, xp.int32) + + return ret_dict + + def call( + self, + extended_coord: np.ndarray, + extended_atype: np.ndarray, + nlist: np.ndarray, + mapping: Optional[np.ndarray] = None, + fparam: Optional[np.ndarray] = None, + aparam: Optional[np.ndarray] = None, + ) -> dict[str, np.ndarray]: + return self.forward_common_atomic( + extended_coord, + extended_atype, + nlist, + mapping=mapping, + fparam=fparam, + aparam=aparam, + ) + + def serialize(self) -> dict: + return { + "type_map": self.type_map, + "atom_exclude_types": self.atom_exclude_types, + "pair_exclude_types": self.pair_exclude_types, + "rcond": self.rcond, + "preset_out_bias": self.preset_out_bias, + "@variables": { + "out_bias": to_numpy_array(self.out_bias), + "out_std": to_numpy_array(self.out_std), + }, + } + + @classmethod + def deserialize(cls, data: dict) -> "BaseAtomicModel": + # do not deep copy Descriptor and Fitting class + data = data.copy() + variables = data.pop("@variables") + obj = cls(**data) + for kk in variables.keys(): + obj[kk] = variables[kk] + return obj + + def apply_out_stat( + self, + ret: dict[str, np.ndarray], + atype: np.ndarray, + ): + """Apply the stat to each atomic output. + The developer may override the method to define how the bias is applied + to the atomic output of the model. + + Parameters + ---------- + ret + The returned dict by the forward_atomic method + atype + The atom types. nf x nloc + + """ + out_bias, out_std = self._fetch_out_stat(self.bias_keys) + for kk in self.bias_keys: + # nf x nloc x odims, out_bias: ntypes x odims + ret[kk] = ret[kk] + out_bias[kk][atype] + return ret + + def _varsize( + self, + shape: list[int], + ) -> int: + output_size = 1 + len_shape = len(shape) + for i in range(len_shape): + output_size *= shape[i] + return output_size + + def _get_bias_index( + self, + kk: str, + ) -> int: + res: list[int] = [] + for i, e in enumerate(self.bias_keys): + if e == kk: + res.append(i) + assert len(res) == 1 + return res[0] + + def _fetch_out_stat( + self, + keys: list[str], + ) -> tuple[dict[str, np.ndarray], dict[str, np.ndarray]]: + ret_bias = {} + ret_std = {} + ntypes = self.get_ntypes() + for kk in keys: + idx = self._get_bias_index(kk) + isize = self._varsize(self.atomic_output_def()[kk].shape) + ret_bias[kk] = self.out_bias[idx, :, :isize].reshape( + [ntypes] + list(self.atomic_output_def()[kk].shape) # noqa: RUF005 + ) + ret_std[kk] = self.out_std[idx, :, :isize].reshape( + [ntypes] + list(self.atomic_output_def()[kk].shape) # noqa: RUF005 + ) + return ret_bias, ret_std diff --git a/deepmd/dpmodel/atomic_model/dipole_atomic_model.py b/deepmd/dpmodel/atomic_model/dipole_atomic_model.py new file mode 100644 index 0000000000..00428f4e95 --- /dev/null +++ b/deepmd/dpmodel/atomic_model/dipole_atomic_model.py @@ -0,0 +1,27 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import numpy as np + +from deepmd.dpmodel.fitting.dipole_fitting import ( + DipoleFitting, +) + +from .dp_atomic_model import ( + DPAtomicModel, +) + + +class DPDipoleAtomicModel(DPAtomicModel): + def __init__(self, descriptor, fitting, type_map, **kwargs): + if not isinstance(fitting, DipoleFitting): + raise TypeError( + "fitting must be an instance of DipoleFitting for DPDipoleAtomicModel" + ) + super().__init__(descriptor, fitting, type_map, **kwargs) + + def apply_out_stat( + self, + ret: dict[str, np.ndarray], + atype: np.ndarray, + ): + # dipole not applying bias + return ret diff --git a/deepmd/dpmodel/atomic_model/dos_atomic_model.py b/deepmd/dpmodel/atomic_model/dos_atomic_model.py new file mode 100644 index 0000000000..7ef6d10ebf --- /dev/null +++ b/deepmd/dpmodel/atomic_model/dos_atomic_model.py @@ -0,0 +1,17 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from deepmd.dpmodel.fitting.dos_fitting import ( + DOSFittingNet, +) + +from .dp_atomic_model import ( + DPAtomicModel, +) + + +class DPDOSAtomicModel(DPAtomicModel): + def __init__(self, descriptor, fitting, type_map, **kwargs): + if not isinstance(fitting, DOSFittingNet): + raise TypeError( + "fitting must be an instance of DOSFittingNet for DPDOSAtomicModel" + ) + super().__init__(descriptor, fitting, type_map, **kwargs) diff --git a/deepmd/dpmodel/atomic_model/dp_atomic_model.py b/deepmd/dpmodel/atomic_model/dp_atomic_model.py new file mode 100644 index 0000000000..749fe6bbf9 --- /dev/null +++ b/deepmd/dpmodel/atomic_model/dp_atomic_model.py @@ -0,0 +1,243 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from typing import ( + Optional, +) + +import numpy as np + +from deepmd.dpmodel.descriptor.base_descriptor import ( + BaseDescriptor, +) +from deepmd.dpmodel.fitting.base_fitting import ( + BaseFitting, +) +from deepmd.dpmodel.output_def import ( + FittingOutputDef, +) +from deepmd.utils.version import ( + check_version_compatibility, +) + +from .base_atomic_model import ( + BaseAtomicModel, +) + + +@BaseAtomicModel.register("standard") +class DPAtomicModel(BaseAtomicModel): + """Model give atomic prediction of some physical property. + + Parameters + ---------- + descriptor + Descriptor + fitting_net + Fitting net + type_map + Mapping atom type to the name (str) of the type. + For example `type_map[1]` gives the name of the type 1. + + """ + + def __init__( + self, + descriptor, + fitting, + type_map: list[str], + **kwargs, + ) -> None: + super().__init__(type_map, **kwargs) + self.type_map = type_map + self.descriptor = descriptor + self.fitting = fitting + self.type_map = type_map + super().init_out_stat() + + def fitting_output_def(self) -> FittingOutputDef: + """Get the output def of the fitting net.""" + return self.fitting.output_def() + + def get_rcut(self) -> float: + """Get the cut-off radius.""" + return self.descriptor.get_rcut() + + def get_sel(self) -> list[int]: + """Get the neighbor selection.""" + return self.descriptor.get_sel() + + def mixed_types(self) -> bool: + """If true, the model + 1. assumes total number of atoms aligned across frames; + 2. uses a neighbor list that does not distinguish different atomic types. + + If false, the model + 1. assumes total number of atoms of each atom type aligned across frames; + 2. uses a neighbor list that distinguishes different atomic types. + + """ + return self.descriptor.mixed_types() + + def has_message_passing(self) -> bool: + """Returns whether the atomic model has message passing.""" + return self.descriptor.has_message_passing() + + def need_sorted_nlist_for_lower(self) -> bool: + """Returns whether the atomic model needs sorted nlist when using `forward_lower`.""" + return self.descriptor.need_sorted_nlist_for_lower() + + def enable_compression( + self, + min_nbor_dist: float, + table_extrapolate: float = 5, + table_stride_1: float = 0.01, + table_stride_2: float = 0.1, + check_frequency: int = -1, + ) -> None: + """Call descriptor enable_compression(). + + Parameters + ---------- + min_nbor_dist + The nearest distance between atoms + table_extrapolate + The scale of model extrapolation + table_stride_1 + The uniform stride of the first table + table_stride_2 + The uniform stride of the second table + check_frequency + The overflow check frequency + """ + self.descriptor.enable_compression( + min_nbor_dist, + table_extrapolate, + table_stride_1, + table_stride_2, + check_frequency, + ) + + def forward_atomic( + self, + extended_coord: np.ndarray, + extended_atype: np.ndarray, + nlist: np.ndarray, + mapping: Optional[np.ndarray] = None, + fparam: Optional[np.ndarray] = None, + aparam: Optional[np.ndarray] = None, + ) -> dict[str, np.ndarray]: + """Models' atomic predictions. + + Parameters + ---------- + extended_coord + coordinates in extended region + extended_atype + atomic type in extended region + nlist + neighbor list. nf x nloc x nsel + mapping + mapps the extended indices to local indices. nf x nall + fparam + frame parameter. nf x ndf + aparam + atomic parameter. nf x nloc x nda + + Returns + ------- + result_dict + the result dict, defined by the `FittingOutputDef`. + + """ + nframes, nloc, nnei = nlist.shape + atype = extended_atype[:, :nloc] + descriptor, rot_mat, g2, h2, sw = self.descriptor( + extended_coord, + extended_atype, + nlist, + mapping=mapping, + ) + ret = self.fitting( + descriptor, + atype, + gr=rot_mat, + g2=g2, + h2=h2, + fparam=fparam, + aparam=aparam, + ) + return ret + + def change_type_map( + self, type_map: list[str], model_with_new_type_stat=None + ) -> None: + """Change the type related params to new ones, according to `type_map` and the original one in the model. + If there are new types in `type_map`, statistics will be updated accordingly to `model_with_new_type_stat` for these new types. + """ + super().change_type_map( + type_map=type_map, model_with_new_type_stat=model_with_new_type_stat + ) + self.type_map = type_map + self.descriptor.change_type_map( + type_map=type_map, + model_with_new_type_stat=model_with_new_type_stat.descriptor + if model_with_new_type_stat is not None + else None, + ) + self.fitting_net.change_type_map(type_map=type_map) + + def serialize(self) -> dict: + dd = super().serialize() + dd.update( + { + "@class": "Model", + "type": "standard", + "@version": 2, + "type_map": self.type_map, + "descriptor": self.descriptor.serialize(), + "fitting": self.fitting.serialize(), + } + ) + return dd + + # for subclass overridden + base_descriptor_cls = BaseDescriptor + """The base descriptor class.""" + base_fitting_cls = BaseFitting + """The base fitting class.""" + + @classmethod + def deserialize(cls, data) -> "DPAtomicModel": + data = data.copy() + check_version_compatibility(data.pop("@version", 1), 2, 2) + data.pop("@class") + data.pop("type") + descriptor_obj = cls.base_descriptor_cls.deserialize(data.pop("descriptor")) + fitting_obj = cls.base_fitting_cls.deserialize(data.pop("fitting")) + data["descriptor"] = descriptor_obj + data["fitting"] = fitting_obj + obj = super().deserialize(data) + return obj + + def get_dim_fparam(self) -> int: + """Get the number (dimension) of frame parameters of this atomic model.""" + return self.fitting.get_dim_fparam() + + def get_dim_aparam(self) -> int: + """Get the number (dimension) of atomic parameters of this atomic model.""" + return self.fitting.get_dim_aparam() + + def get_sel_type(self) -> list[int]: + """Get the selected atom types of this model. + + Only atoms with selected atom types have atomic contribution + to the result of the model. + If returning an empty list, all atom types are selected. + """ + return self.fitting.get_sel_type() + + def is_aparam_nall(self) -> bool: + """Check whether the shape of atomic parameters is (nframes, nall, ndim). + + If False, the shape is (nframes, nloc, ndim). + """ + return False diff --git a/deepmd/dpmodel/atomic_model/energy_atomic_model.py b/deepmd/dpmodel/atomic_model/energy_atomic_model.py new file mode 100644 index 0000000000..4f9f8ec005 --- /dev/null +++ b/deepmd/dpmodel/atomic_model/energy_atomic_model.py @@ -0,0 +1,20 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from deepmd.dpmodel.fitting.ener_fitting import ( + EnergyFittingNet, + InvarFitting, +) + +from .dp_atomic_model import ( + DPAtomicModel, +) + + +class DPEnergyAtomicModel(DPAtomicModel): + def __init__(self, descriptor, fitting, type_map, **kwargs): + if not ( + isinstance(fitting, EnergyFittingNet) or isinstance(fitting, InvarFitting) + ): + raise TypeError( + "fitting must be an instance of EnergyFittingNet or InvarFitting for DPEnergyAtomicModel" + ) + super().__init__(descriptor, fitting, type_map, **kwargs) diff --git a/deepmd/dpmodel/atomic_model/linear_atomic_model.py b/deepmd/dpmodel/atomic_model/linear_atomic_model.py new file mode 100644 index 0000000000..9676b34bfd --- /dev/null +++ b/deepmd/dpmodel/atomic_model/linear_atomic_model.py @@ -0,0 +1,493 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from typing import ( + Optional, + Union, +) + +import array_api_compat +import numpy as np + +from deepmd.dpmodel.utils.nlist import ( + build_multiple_neighbor_list, + get_multiple_nlist_key, + nlist_distinguish_types, +) +from deepmd.env import ( + GLOBAL_NP_FLOAT_PRECISION, +) +from deepmd.utils.version import ( + check_version_compatibility, +) + +from ..output_def import ( + FittingOutputDef, + OutputVariableDef, +) +from .base_atomic_model import ( + BaseAtomicModel, +) +from .dp_atomic_model import ( + DPAtomicModel, +) +from .pairtab_atomic_model import ( + PairTabAtomicModel, +) + + +@BaseAtomicModel.register("linear") +class LinearEnergyAtomicModel(BaseAtomicModel): + """Linear model make linear combinations of several existing models. + + Parameters + ---------- + models : list[DPAtomicModel or PairTabAtomicModel] + A list of models to be combined. PairTabAtomicModel must be used together with a DPAtomicModel. + type_map : list[str] + Mapping atom type to the name (str) of the type. + For example `type_map[1]` gives the name of the type 1. + """ + + def __init__( + self, + models: list[BaseAtomicModel], + type_map: list[str], + **kwargs, + ) -> None: + super().__init__(type_map, **kwargs) + super().init_out_stat() + + # check all sub models are of mixed type. + model_mixed_type = [] + for m in models: + if not m.mixed_types(): + model_mixed_type.append(m) + if len(model_mixed_type) > 0: + raise ValueError( + f"LinearAtomicModel only supports AtomicModel of mixed type, the following models are not mixed type: {model_mixed_type}." + ) + + self.models = models + sub_model_type_maps = [md.get_type_map() for md in models] + err_msg = [] + mapping_list = [] + common_type_map = set(type_map) + self.type_map = type_map + for tpmp in sub_model_type_maps: + if not common_type_map.issubset(set(tpmp)): + err_msg.append( + f"type_map {tpmp} is not a subset of type_map {type_map}" + ) + mapping_list.append(self.remap_atype(tpmp, self.type_map)) + self.mapping_list = mapping_list + assert len(err_msg) == 0, "\n".join(err_msg) + self.mixed_types_list = [model.mixed_types() for model in self.models] + + def mixed_types(self) -> bool: + """If true, the model + 1. assumes total number of atoms aligned across frames; + 2. uses a neighbor list that does not distinguish different atomic types. + + If false, the model + 1. assumes total number of atoms of each atom type aligned across frames; + 2. uses a neighbor list that distinguishes different atomic types. + + """ + return True + + def has_message_passing(self) -> bool: + """Returns whether the atomic model has message passing.""" + return any(model.has_message_passing() for model in self.models) + + def need_sorted_nlist_for_lower(self) -> bool: + """Returns whether the atomic model needs sorted nlist when using `forward_lower`.""" + return True + + def get_rcut(self) -> float: + """Get the cut-off radius.""" + return max(self.get_model_rcuts()) + + def get_type_map(self) -> list[str]: + """Get the type map.""" + return self.type_map + + def change_type_map( + self, type_map: list[str], model_with_new_type_stat=None + ) -> None: + """Change the type related params to new ones, according to `type_map` and the original one in the model. + If there are new types in `type_map`, statistics will be updated accordingly to `model_with_new_type_stat` for these new types. + """ + super().change_type_map( + type_map=type_map, model_with_new_type_stat=model_with_new_type_stat + ) + for ii, model in enumerate(self.models): + model.change_type_map( + type_map=type_map, + model_with_new_type_stat=model_with_new_type_stat.models[ii] + if model_with_new_type_stat is not None + else None, + ) + + def get_model_rcuts(self) -> list[float]: + """Get the cut-off radius for each individual models.""" + return [model.get_rcut() for model in self.models] + + def get_sel(self) -> list[int]: + return [max([model.get_nsel() for model in self.models])] + + def get_model_nsels(self) -> list[int]: + """Get the processed sels for each individual models. Not distinguishing types.""" + return [model.get_nsel() for model in self.models] + + def get_model_sels(self) -> list[Union[int, list[int]]]: + """Get the sels for each individual models.""" + return [model.get_sel() for model in self.models] + + def _sort_rcuts_sels(self) -> tuple[list[float], list[int]]: + # sort the pair of rcut and sels in ascending order, first based on sel, then on rcut. + zipped = sorted( + zip(self.get_model_rcuts(), self.get_model_nsels()), + key=lambda x: (x[1], x[0]), + ) + return [p[0] for p in zipped], [p[1] for p in zipped] + + def enable_compression( + self, + min_nbor_dist: float, + table_extrapolate: float = 5, + table_stride_1: float = 0.01, + table_stride_2: float = 0.1, + check_frequency: int = -1, + ) -> None: + """Compress model. + + Parameters + ---------- + min_nbor_dist + The nearest distance between atoms + table_extrapolate + The scale of model extrapolation + table_stride_1 + The uniform stride of the first table + table_stride_2 + The uniform stride of the second table + check_frequency + The overflow check frequency + """ + for model in self.models: + model.enable_compression( + min_nbor_dist, + table_extrapolate, + table_stride_1, + table_stride_2, + check_frequency, + ) + + def forward_atomic( + self, + extended_coord, + extended_atype, + nlist, + mapping: Optional[np.ndarray] = None, + fparam: Optional[np.ndarray] = None, + aparam: Optional[np.ndarray] = None, + ) -> dict[str, np.ndarray]: + """Return atomic prediction. + + Parameters + ---------- + extended_coord + coordinates in extended region, (nframes, nall * 3) + extended_atype + atomic type in extended region, (nframes, nall) + nlist + neighbor list, (nframes, nloc, nsel). + mapping + mapps the extended indices to local indices. + fparam + frame parameter. (nframes, ndf) + aparam + atomic parameter. (nframes, nloc, nda) + + Returns + ------- + result_dict + the result dict, defined by the fitting net output def. + """ + xp = array_api_compat.array_namespace(extended_coord, extended_atype, nlist) + nframes, nloc, nnei = nlist.shape + extended_coord = xp.reshape(extended_coord, (nframes, -1, 3)) + sorted_rcuts, sorted_sels = self._sort_rcuts_sels() + nlists = build_multiple_neighbor_list( + extended_coord, + nlist, + sorted_rcuts, + sorted_sels, + ) + raw_nlists = [ + nlists[get_multiple_nlist_key(rcut, sel)] + for rcut, sel in zip(self.get_model_rcuts(), self.get_model_nsels()) + ] + nlists_ = [ + nl if mt else nlist_distinguish_types(nl, extended_atype, sel) + for mt, nl, sel in zip( + self.mixed_types_list, raw_nlists, self.get_model_sels() + ) + ] + ener_list = [] + for i, model in enumerate(self.models): + type_map_model = self.mapping_list[i] + ener_list.append( + model.forward_atomic( + extended_coord, + type_map_model[extended_atype], + nlists_[i], + mapping, + fparam, + aparam, + )["energy"] + ) + weights = self._compute_weight(extended_coord, extended_atype, nlists_) + + fit_ret = { + "energy": xp.sum(xp.stack(ener_list) * xp.stack(weights), axis=0), + } # (nframes, nloc, 1) + return fit_ret + + @staticmethod + def remap_atype(ori_map: list[str], new_map: list[str]) -> np.ndarray: + """ + This method is used to map the atype from the common type_map to the original type_map of + indivial AtomicModels. + + Parameters + ---------- + ori_map : list[str] + The original type map of an AtomicModel. + new_map : list[str] + The common type map of the DPZBLLinearEnergyAtomicModel, created by the `get_type_map` method, + must be a subset of the ori_map. + + Returns + ------- + np.ndarray + """ + type_2_idx = {atp: idx for idx, atp in enumerate(ori_map)} + # this maps the atype in the new map to the original map + mapping = np.array([type_2_idx[new_map[idx]] for idx in range(len(new_map))]) + return mapping + + def fitting_output_def(self) -> FittingOutputDef: + return FittingOutputDef( + [ + OutputVariableDef( + name="energy", + shape=[1], + reducible=True, + r_differentiable=True, + c_differentiable=True, + ) + ] + ) + + def serialize(self) -> dict: + dd = super().serialize() + dd.update( + { + "@class": "Model", + "@version": 2, + "type": "linear", + "models": [model.serialize() for model in self.models], + "type_map": self.type_map, + } + ) + return dd + + @classmethod + def deserialize(cls, data: dict) -> "LinearEnergyAtomicModel": + data = data.copy() + check_version_compatibility(data.pop("@version", 2), 2, 2) + data.pop("@class", None) + data.pop("type", None) + models = [ + BaseAtomicModel.get_class_by_type(model["type"]).deserialize(model) + for model in data["models"] + ] + data["models"] = models + return super().deserialize(data) + + def _compute_weight( + self, + extended_coord: np.ndarray, + extended_atype: np.ndarray, + nlists_: list[np.ndarray], + ) -> list[np.ndarray]: + """This should be a list of user defined weights that matches the number of models to be combined.""" + xp = array_api_compat.array_namespace(extended_coord, extended_atype, nlists_) + nmodels = len(self.models) + nframes, nloc, _ = nlists_[0].shape + # the dtype of weights is the interface data type. + return [ + xp.ones((nframes, nloc, 1), dtype=GLOBAL_NP_FLOAT_PRECISION) / nmodels + for _ in range(nmodels) + ] + + def get_dim_fparam(self) -> int: + """Get the number (dimension) of frame parameters of this atomic model.""" + # tricky... + return max([model.get_dim_fparam() for model in self.models]) + + def get_dim_aparam(self) -> int: + """Get the number (dimension) of atomic parameters of this atomic model.""" + return max([model.get_dim_aparam() for model in self.models]) + + def get_sel_type(self) -> list[int]: + """Get the selected atom types of this model. + + Only atoms with selected atom types have atomic contribution + to the result of the model. + If returning an empty list, all atom types are selected. + """ + if any(model.get_sel_type() == [] for model in self.models): + return [] + # join all the selected types + return list(set().union(*[model.get_sel_type() for model in self.models])) + + def is_aparam_nall(self) -> bool: + """Check whether the shape of atomic parameters is (nframes, nall, ndim). + + If False, the shape is (nframes, nloc, ndim). + """ + return False + + +@BaseAtomicModel.register("zbl") +class DPZBLLinearEnergyAtomicModel(LinearEnergyAtomicModel): + """Model linearly combine a list of AtomicModels. + + Parameters + ---------- + dp_model + The DPAtomicModel being combined. + zbl_model + The PairTable model being combined. + sw_rmin + The lower boundary of the interpolation between short-range tabulated interaction and DP. + sw_rmax + The upper boundary of the interpolation between short-range tabulated interaction and DP. + type_map + Mapping atom type to the name (str) of the type. + For example `type_map[1]` gives the name of the type 1. + smin_alpha + The short-range tabulated interaction will be switched according to the distance of the nearest neighbor. + This distance is calculated by softmin. + """ + + def __init__( + self, + dp_model: DPAtomicModel, + zbl_model: PairTabAtomicModel, + sw_rmin: float, + sw_rmax: float, + type_map: list[str], + smin_alpha: Optional[float] = 0.1, + **kwargs, + ) -> None: + models = [dp_model, zbl_model] + kwargs["models"] = models + kwargs["type_map"] = type_map + super().__init__(**kwargs) + + self.sw_rmin = sw_rmin + self.sw_rmax = sw_rmax + self.smin_alpha = smin_alpha + + def serialize(self) -> dict: + dd = super().serialize() + dd.update( + { + "@class": "Model", + "@version": 2, + "type": "zbl", + "sw_rmin": self.sw_rmin, + "sw_rmax": self.sw_rmax, + "smin_alpha": self.smin_alpha, + } + ) + return dd + + @classmethod + def deserialize(cls, data) -> "DPZBLLinearEnergyAtomicModel": + data = data.copy() + check_version_compatibility(data.pop("@version", 1), 2, 2) + models = [ + BaseAtomicModel.get_class_by_type(model["type"]).deserialize(model) + for model in data["models"] + ] + data["dp_model"], data["zbl_model"] = models[0], models[1] + data.pop("@class", None) + data.pop("type", None) + return super().deserialize(data) + + def _compute_weight( + self, + extended_coord: np.ndarray, + extended_atype: np.ndarray, + nlists_: list[np.ndarray], + ) -> list[np.ndarray]: + """ZBL weight. + + Returns + ------- + list[np.ndarray] + the atomic ZBL weight for interpolation. (nframes, nloc, 1) + """ + assert ( + self.sw_rmax > self.sw_rmin + ), "The upper boundary `sw_rmax` must be greater than the lower boundary `sw_rmin`." + + xp = array_api_compat.array_namespace(extended_coord, extended_atype) + dp_nlist = nlists_[0] + zbl_nlist = nlists_[1] + + zbl_nnei = zbl_nlist.shape[-1] + dp_nnei = dp_nlist.shape[-1] + + # use the larger rr based on nlist + nlist_larger = zbl_nlist if zbl_nnei >= dp_nnei else dp_nlist + masked_nlist = xp.clip(nlist_larger, 0, None) + pairwise_rr = PairTabAtomicModel._get_pairwise_dist( + extended_coord, masked_nlist + ) + + numerator = xp.sum( + xp.where( + nlist_larger != -1, + pairwise_rr * xp.exp(-pairwise_rr / self.smin_alpha), + xp.zeros_like(nlist_larger), + ), + axis=-1, + ) # masked nnei will be zero, no need to handle + denominator = xp.sum( + xp.where( + nlist_larger != -1, + xp.exp(-pairwise_rr / self.smin_alpha), + xp.zeros_like(nlist_larger), + ), + axis=-1, + ) # handle masked nnei. + with np.errstate(divide="ignore", invalid="ignore"): + sigma = numerator / denominator + u = (sigma - self.sw_rmin) / (self.sw_rmax - self.sw_rmin) + coef = xp.zeros_like(u) + left_mask = sigma < self.sw_rmin + mid_mask = (self.sw_rmin <= sigma) & (sigma < self.sw_rmax) + right_mask = sigma >= self.sw_rmax + coef = xp.where(left_mask, xp.ones_like(coef), coef) + with np.errstate(invalid="ignore"): + smooth = -6 * u**5 + 15 * u**4 - 10 * u**3 + 1 + coef = xp.where(mid_mask, smooth, coef) + coef = xp.where(right_mask, xp.zeros_like(coef), coef) + # to handle masked atoms + coef = xp.where(sigma != 0, coef, xp.zeros_like(coef)) + self.zbl_weight = coef + return [1 - xp.expand_dims(coef, -1), xp.expand_dims(coef, -1)] diff --git a/deepmd/dpmodel/atomic_model/make_base_atomic_model.py b/deepmd/dpmodel/atomic_model/make_base_atomic_model.py new file mode 100644 index 0000000000..a4c38518a3 --- /dev/null +++ b/deepmd/dpmodel/atomic_model/make_base_atomic_model.py @@ -0,0 +1,243 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from abc import ( + ABC, + abstractmethod, +) +from typing import ( + Optional, +) + +from deepmd.dpmodel.output_def import ( + FittingOutputDef, +) +from deepmd.utils.plugin import ( + PluginVariant, + make_plugin_registry, +) + + +def make_base_atomic_model( + t_tensor, + fwd_method_name: str = "forward_atomic", +): + """Make the base class for the atomic model. + + Parameters + ---------- + t_tensor + The type of the tensor. used in the type hint. + fwd_method_name + Name of the forward method. For dpmodels, it should be "call". + For torch models, it should be "forward". + + """ + + class BAM(ABC, PluginVariant, make_plugin_registry("atomic model")): + """Base Atomic Model provides the interfaces of an atomic model.""" + + @abstractmethod + def fitting_output_def(self) -> FittingOutputDef: + """Get the output def of developer implemented atomic models.""" + pass + + def atomic_output_def(self) -> FittingOutputDef: + """Get the output def of the atomic model. + + By default it is the same as FittingOutputDef, but it + allows model level wrapper of the output defined by the developer. + + """ + return self.fitting_output_def() + + @abstractmethod + def get_rcut(self) -> float: + """Get the cut-off radius.""" + pass + + @abstractmethod + def get_type_map(self) -> list[str]: + """Get the type map.""" + pass + + def get_ntypes(self) -> int: + """Get the number of atom types.""" + return len(self.get_type_map()) + + @abstractmethod + def get_sel(self) -> list[int]: + """Returns the number of selected atoms for each type.""" + pass + + def get_nsel(self) -> int: + """Returns the total number of selected neighboring atoms in the cut-off radius.""" + return sum(self.get_sel()) + + def get_nnei(self) -> int: + """Returns the total number of selected neighboring atoms in the cut-off radius.""" + return self.get_nsel() + + @abstractmethod + def get_dim_fparam(self) -> int: + """Get the number (dimension) of frame parameters of this atomic model.""" + + @abstractmethod + def get_dim_aparam(self) -> int: + """Get the number (dimension) of atomic parameters of this atomic model.""" + + @abstractmethod + def get_sel_type(self) -> list[int]: + """Get the selected atom types of this model. + + Only atoms with selected atom types have atomic contribution + to the result of the model. + If returning an empty list, all atom types are selected. + """ + + @abstractmethod + def is_aparam_nall(self) -> bool: + """Check whether the shape of atomic parameters is (nframes, nall, ndim). + + If False, the shape is (nframes, nloc, ndim). + """ + + @abstractmethod + def mixed_types(self) -> bool: + """If true, the model + 1. assumes total number of atoms aligned across frames; + 2. uses a neighbor list that does not distinguish different atomic types. + + If false, the model + 1. assumes total number of atoms of each atom type aligned across frames; + 2. uses a neighbor list that distinguishes different atomic types. + + """ + pass + + @abstractmethod + def has_message_passing(self) -> bool: + """Returns whether the descriptor has message passing.""" + + @abstractmethod + def need_sorted_nlist_for_lower(self) -> bool: + """Returns whether the descriptor needs sorted nlist when using `forward_lower`.""" + + @abstractmethod + def fwd( + self, + extended_coord: t_tensor, + extended_atype: t_tensor, + nlist: t_tensor, + mapping: Optional[t_tensor] = None, + fparam: Optional[t_tensor] = None, + aparam: Optional[t_tensor] = None, + ) -> dict[str, t_tensor]: + pass + + @abstractmethod + def serialize(self) -> dict: + pass + + @classmethod + @abstractmethod + def deserialize(cls, data: dict): + pass + + @abstractmethod + def change_type_map( + self, type_map: list[str], model_with_new_type_stat=None + ) -> None: + pass + + def enable_compression( + self, + min_nbor_dist: float, + table_extrapolate: float = 5, + table_stride_1: float = 0.01, + table_stride_2: float = 0.1, + check_frequency: int = -1, + ) -> None: + """Call descriptor enable_compression(). + + Parameters + ---------- + min_nbor_dist + The nearest distance between atoms + table_extrapolate + The scale of model extrapolation + table_stride_1 + The uniform stride of the first table + table_stride_2 + The uniform stride of the second table + check_frequency + The overflow check frequency + """ + raise NotImplementedError("This atomi model doesn't support compression!") + + def make_atom_mask( + self, + atype: t_tensor, + ) -> t_tensor: + """The atoms with type < 0 are treated as virtual atoms, + which serves as place-holders for multi-frame calculations + with different number of atoms in different frames. + + Parameters + ---------- + atype + Atom types. >= 0 for real atoms <0 for virtual atoms. + + Returns + ------- + mask + True for real atoms and False for virtual atoms. + + """ + # supposed to be supported by all backends + return atype >= 0 + + def do_grad_r( + self, + var_name: Optional[str] = None, + ) -> bool: + """Tell if the output variable `var_name` is r_differentiable. + if var_name is None, returns if any of the variable is r_differentiable. + + """ + odef = self.fitting_output_def() + if var_name is None: + require: list[bool] = [] + for vv in odef.keys(): + require.append(self.do_grad_(vv, "r")) + return any(require) + else: + return self.do_grad_(var_name, "r") + + def do_grad_c( + self, + var_name: Optional[str] = None, + ) -> bool: + """Tell if the output variable `var_name` is c_differentiable. + if var_name is None, returns if any of the variable is c_differentiable. + + """ + odef = self.fitting_output_def() + if var_name is None: + require: list[bool] = [] + for vv in odef.keys(): + require.append(self.do_grad_(vv, "c")) + return any(require) + else: + return self.do_grad_(var_name, "c") + + def do_grad_(self, var_name: str, base: str) -> bool: + """Tell if the output variable `var_name` is differentiable.""" + assert var_name is not None + assert base in ["c", "r"] + if base == "c": + return self.fitting_output_def()[var_name].c_differentiable + return self.fitting_output_def()[var_name].r_differentiable + + setattr(BAM, fwd_method_name, BAM.fwd) + delattr(BAM, "fwd") + + return BAM diff --git a/deepmd/dpmodel/atomic_model/pairtab_atomic_model.py b/deepmd/dpmodel/atomic_model/pairtab_atomic_model.py new file mode 100644 index 0000000000..aefdbf7f1c --- /dev/null +++ b/deepmd/dpmodel/atomic_model/pairtab_atomic_model.py @@ -0,0 +1,427 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from typing import ( + Optional, + Union, +) + +import array_api_compat +import numpy as np + +from deepmd.dpmodel.array_api import ( + xp_take_along_axis, +) +from deepmd.dpmodel.output_def import ( + FittingOutputDef, + OutputVariableDef, +) +from deepmd.dpmodel.utils.safe_gradient import ( + safe_for_sqrt, +) +from deepmd.utils.pair_tab import ( + PairTab, +) +from deepmd.utils.version import ( + check_version_compatibility, +) + +from .base_atomic_model import ( + BaseAtomicModel, +) + + +@BaseAtomicModel.register("pairtab") +class PairTabAtomicModel(BaseAtomicModel): + """Pairwise tabulation energy model. + + This model can be used to tabulate the pairwise energy between atoms for either + short-range or long-range interactions, such as D3, LJ, ZBL, etc. It should not + be used alone, but rather as one submodel of a linear (sum) model, such as + DP+D3. + + Do not put the model on the first model of a linear model, since the linear + model fetches the type map from the first model. + + At this moment, the model does not smooth the energy at the cutoff radius, so + one needs to make sure the energy has been smoothed to zero. + + Parameters + ---------- + tab_file : str + The path to the tabulation file. + rcut : float + The cutoff radius. + sel : int or list[int] + The maxmum number of atoms in the cut-off radius. + type_map : list[str] + Mapping atom type to the name (str) of the type. + For example `type_map[1]` gives the name of the type 1. + """ + + def __init__( + self, + tab_file: str, + rcut: float, + sel: Union[int, list[int]], + type_map: list[str], + rcond: Optional[float] = None, + atom_ener: Optional[list[float]] = None, + **kwargs, + ) -> None: + super().__init__(type_map, **kwargs) + super().init_out_stat() + self.tab_file = tab_file + self.rcut = rcut + self.type_map = type_map + + self.tab = PairTab(self.tab_file, rcut=rcut) + self.type_map = type_map + self.ntypes = len(type_map) + self.rcond = rcond + self.atom_ener = atom_ener + + if self.tab_file is not None: + tab_info, tab_data = self.tab.get() + nspline, ntypes_tab = tab_info[-2:].astype(int) + self.tab_info = tab_info + self.tab_data = tab_data.reshape(ntypes_tab, ntypes_tab, nspline, 4) + if self.ntypes != ntypes_tab: + raise ValueError( + "The `type_map` provided does not match the number of columns in the table." + ) + else: + self.tab_info, self.tab_data = None, None + + if isinstance(sel, int): + self.sel = sel + elif isinstance(sel, list): + self.sel = sum(sel) + else: + raise TypeError("sel must be int or list[int]") + + def fitting_output_def(self) -> FittingOutputDef: + return FittingOutputDef( + [ + OutputVariableDef( + name="energy", + shape=[1], + reducible=True, + r_differentiable=True, + c_differentiable=True, + ) + ] + ) + + def get_rcut(self) -> float: + return self.rcut + + def get_type_map(self) -> list[str]: + return self.type_map + + def get_sel(self) -> list[int]: + return [self.sel] + + def get_nsel(self) -> int: + return self.sel + + def mixed_types(self) -> bool: + """If true, the model + 1. assumes total number of atoms aligned across frames; + 2. uses a neighbor list that does not distinguish different atomic types. + + If false, the model + 1. assumes total number of atoms of each atom type aligned across frames; + 2. uses a neighbor list that distinguishes different atomic types. + + """ + # to match DPA1 and DPA2. + return True + + def has_message_passing(self) -> bool: + """Returns whether the atomic model has message passing.""" + return False + + def need_sorted_nlist_for_lower(self) -> bool: + """Returns whether the atomic model needs sorted nlist when using `forward_lower`.""" + return False + + def change_type_map( + self, type_map: list[str], model_with_new_type_stat=None + ) -> None: + """Change the type related params to new ones, according to `type_map` and the original one in the model. + If there are new types in `type_map`, statistics will be updated accordingly to `model_with_new_type_stat` for these new types. + """ + assert type_map == self.type_map, ( + "PairTabAtomicModel does not support changing type map now. " + "This feature is currently not implemented because it would require additional work to change the tab file. " + "We may consider adding this support in the future if there is a clear demand for it." + ) + + def serialize(self) -> dict: + dd = BaseAtomicModel.serialize(self) + dd.update( + { + "@class": "Model", + "type": "pairtab", + "@version": 2, + "tab": self.tab.serialize(), + "rcut": self.rcut, + "sel": self.sel, + "type_map": self.type_map, + } + ) + return dd + + @classmethod + def deserialize(cls, data) -> "PairTabAtomicModel": + data = data.copy() + check_version_compatibility(data.pop("@version", 1), 2, 2) + data.pop("@class") + data.pop("type") + tab = PairTab.deserialize(data.pop("tab")) + data["tab_file"] = None + tab_model = super().deserialize(data) + + tab_model.tab = tab + tab_model.tab_info = tab_model.tab.tab_info + nspline, ntypes = tab_model.tab_info[-2:].astype(int) + tab_model.tab_data = tab_model.tab.tab_data.reshape(ntypes, ntypes, nspline, 4) + return tab_model + + def forward_atomic( + self, + extended_coord, + extended_atype, + nlist, + mapping: Optional[np.ndarray] = None, + fparam: Optional[np.ndarray] = None, + aparam: Optional[np.ndarray] = None, + ) -> dict[str, np.ndarray]: + xp = array_api_compat.array_namespace(extended_coord, extended_atype, nlist) + nframes, nloc, nnei = nlist.shape + extended_coord = xp.reshape(extended_coord, (nframes, -1, 3)) + + # this will mask all -1 in the nlist + mask = nlist >= 0 + masked_nlist = nlist * mask + + atype = extended_atype[:, :nloc] # (nframes, nloc) + pairwise_rr = self._get_pairwise_dist( + extended_coord, masked_nlist + ) # (nframes, nloc, nnei) + + # (nframes, nloc, nnei), index type is int64. + j_type = extended_atype[ + xp.arange(extended_atype.shape[0], dtype=xp.int64)[:, None, None], + masked_nlist, + ] + + raw_atomic_energy = self._pair_tabulated_inter( + nlist, atype, j_type, pairwise_rr + ) + atomic_energy = 0.5 * xp.sum( + xp.where(nlist != -1, raw_atomic_energy, xp.zeros_like(raw_atomic_energy)), + axis=-1, + ) + atomic_energy = xp.reshape(atomic_energy, (nframes, nloc, 1)) + + return {"energy": atomic_energy} + + def _pair_tabulated_inter( + self, + nlist: np.ndarray, + i_type: np.ndarray, + j_type: np.ndarray, + rr: np.ndarray, + ) -> np.ndarray: + """Pairwise tabulated energy. + + Parameters + ---------- + nlist : np.ndarray + The unmasked neighbour list. (nframes, nloc) + i_type : np.ndarray + The integer representation of atom type for all local atoms for all frames. (nframes, nloc) + j_type : np.ndarray + The integer representation of atom type for all neighbour atoms of all local atoms for all frames. (nframes, nloc, nnei) + rr : np.ndarray + The salar distance vector between two atoms. (nframes, nloc, nnei) + + Returns + ------- + np.ndarray + The masked atomic energy for all local atoms for all frames. (nframes, nloc, nnei) + + Raises + ------ + Exception + If the distance is beyond the table. + + Notes + ----- + This function is used to calculate the pairwise energy between two atoms. + It uses a table containing cubic spline coefficients calculated in PairTab. + """ + xp = array_api_compat.array_namespace(nlist, i_type, j_type, rr) + nframes, nloc, nnei = nlist.shape + rmin = self.tab_info[0] + hh = self.tab_info[1] + hi = 1.0 / hh + + # jax jit does not support convert to a Python int, so we need to convert to xp.int64. + nspline = (self.tab_info[2] + 0.1).astype(xp.int64) + + uu = (rr - rmin) * hi # this is broadcasted to (nframes,nloc,nnei) + + # if nnei of atom 0 has -1 in the nlist, uu would be 0. + # this is to handle the nlist where the mask is set to 0, so that we don't raise exception for those atoms. + uu = xp.where(nlist != -1, uu, nspline + 1) + + # unsupported by jax + # if xp.any(uu < 0): + # raise Exception("coord go beyond table lower boundary") + + idx = xp.astype(uu, xp.int64) + + uu -= idx + table_coef = self._extract_spline_coefficient( + i_type, j_type, idx, self.tab_data, nspline + ) + table_coef = xp.reshape(table_coef, (nframes, nloc, nnei, 4)) + ener = self._calculate_ener(table_coef, uu) + # here we need to overwrite energy to zero at rcut and beyond. + mask_beyond_rcut = rr >= self.rcut + # also overwrite values beyond extrapolation to zero + extrapolation_mask = rr >= self.tab.rmin + nspline * self.tab.hh + ener = xp.where( + xp.logical_or(mask_beyond_rcut, extrapolation_mask), + xp.zeros_like(ener), + ener, + ) + + return ener + + @staticmethod + def _get_pairwise_dist(coords: np.ndarray, nlist: np.ndarray) -> np.ndarray: + """Get pairwise distance `dr`. + + Parameters + ---------- + coords : np.ndarray + The coordinate of the atoms, shape of (nframes, nall, 3). + nlist + The masked nlist, shape of (nframes, nloc, nnei). + + Returns + ------- + np.ndarray + The pairwise distance between the atoms (nframes, nloc, nnei). + """ + xp = array_api_compat.array_namespace(coords, nlist) + # index type is int64 + batch_indices = xp.arange(nlist.shape[0], dtype=xp.int64)[:, None, None] + neighbor_atoms = coords[batch_indices, nlist] + loc_atoms = coords[:, : nlist.shape[1], :] + pairwise_dr = loc_atoms[:, :, None, :] - neighbor_atoms + pairwise_rr = safe_for_sqrt(xp.sum(xp.power(pairwise_dr, 2), axis=-1)) + + return pairwise_rr + + @staticmethod + def _extract_spline_coefficient( + i_type: np.ndarray, + j_type: np.ndarray, + idx: np.ndarray, + tab_data: np.ndarray, + nspline: np.int64, + ) -> np.ndarray: + """Extract the spline coefficient from the table. + + Parameters + ---------- + i_type : np.ndarray + The integer representation of atom type for all local atoms for all frames. (nframes, nloc) + j_type : np.ndarray + The integer representation of atom type for all neighbour atoms of all local atoms for all frames. (nframes, nloc, nnei) + idx : np.ndarray + The index of the spline coefficient. (nframes, nloc, nnei) + tab_data : np.ndarray + The table storing all the spline coefficient. (ntype, ntype, nspline, 4) + nspline : int + The number of splines in the table. + + Returns + ------- + np.ndarray + The spline coefficient. (nframes, nloc, nnei, 4), shape may be squeezed. + """ + xp = array_api_compat.array_namespace(i_type, j_type, idx, tab_data) + # (nframes, nloc, nnei) + expanded_i_type = xp.broadcast_to( + i_type[:, :, xp.newaxis], + (i_type.shape[0], i_type.shape[1], j_type.shape[-1]), + ) + + # (nframes, nloc, nnei, nspline, 4) + expanded_tab_data = tab_data[expanded_i_type, j_type] + + # (nframes, nloc, nnei, 1, 4) + expanded_idx = xp.broadcast_to( + idx[..., xp.newaxis, xp.newaxis], (*idx.shape, 1, 4) + ) + clipped_indices = xp.clip(expanded_idx, 0, nspline - 1).astype(int) + + # (nframes, nloc, nnei, 4) + final_coef = xp.squeeze( + xp_take_along_axis(expanded_tab_data, clipped_indices, 3) + ) + + # when the spline idx is beyond the table, all spline coefficients are set to `0`, and the resulting ener corresponding to the idx is also `0`. + final_coef = xp.where( + expanded_idx.squeeze() > nspline, xp.zeros_like(final_coef), final_coef + ) + return final_coef + + @staticmethod + def _calculate_ener(coef: np.ndarray, uu: np.ndarray) -> np.ndarray: + """Calculate energy using spline coeeficients. + + Parameters + ---------- + coef : np.ndarray + The spline coefficients. (nframes, nloc, nnei, 4) + uu : np.ndarray + The atom displancemnt used in interpolation and extrapolation (nframes, nloc, nnei) + + Returns + ------- + np.ndarray + The atomic energy for all local atoms for all frames. (nframes, nloc, nnei) + """ + a3, a2, a1, a0 = coef[..., 0], coef[..., 1], coef[..., 2], coef[..., 3] + etmp = (a3 * uu + a2) * uu + a1 # this should be elementwise operations. + ener = etmp * uu + a0 # this energy has the extrapolated value when rcut > rmax + return ener + + def get_dim_fparam(self) -> int: + """Get the number (dimension) of frame parameters of this atomic model.""" + return 0 + + def get_dim_aparam(self) -> int: + """Get the number (dimension) of atomic parameters of this atomic model.""" + return 0 + + def get_sel_type(self) -> list[int]: + """Get the selected atom types of this model. + + Only atoms with selected atom types have atomic contribution + to the result of the model. + If returning an empty list, all atom types are selected. + """ + return [] + + def is_aparam_nall(self) -> bool: + """Check whether the shape of atomic parameters is (nframes, nall, ndim). + + If False, the shape is (nframes, nloc, ndim). + """ + return False diff --git a/deepmd/dpmodel/atomic_model/polar_atomic_model.py b/deepmd/dpmodel/atomic_model/polar_atomic_model.py new file mode 100644 index 0000000000..bc7860491c --- /dev/null +++ b/deepmd/dpmodel/atomic_model/polar_atomic_model.py @@ -0,0 +1,64 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later + +import array_api_compat +import numpy as np + +from deepmd.dpmodel.fitting.polarizability_fitting import ( + PolarFitting, +) + +from .dp_atomic_model import ( + DPAtomicModel, +) + + +class DPPolarAtomicModel(DPAtomicModel): + def __init__(self, descriptor, fitting, type_map, **kwargs): + if not isinstance(fitting, PolarFitting): + raise TypeError( + "fitting must be an instance of PolarFitting for DPPolarAtomicModel" + ) + super().__init__(descriptor, fitting, type_map, **kwargs) + + def apply_out_stat( + self, + ret: dict[str, np.ndarray], + atype: np.ndarray, + ): + """Apply the stat to each atomic output. + + Parameters + ---------- + ret + The returned dict by the forward_atomic method + atype + The atom types. nf x nloc + + """ + xp = array_api_compat.array_namespace(atype) + out_bias, out_std = self._fetch_out_stat(self.bias_keys) + + if self.fitting.shift_diag: + nframes, nloc = atype.shape + dtype = out_bias[self.bias_keys[0]].dtype + for kk in self.bias_keys: + ntypes = out_bias[kk].shape[0] + temp = xp.mean( + xp.diagonal(out_bias[kk].reshape(ntypes, 3, 3), axis1=1, axis2=2), + axis=1, + ) + modified_bias = temp[atype] + + # (nframes, nloc, 1) + modified_bias = ( + modified_bias[..., xp.newaxis] * (self.fitting.scale[atype]) + ) + + eye = xp.eye(3, dtype=dtype) + eye = xp.tile(eye, (nframes, nloc, 1, 1)) + # (nframes, nloc, 3, 3) + modified_bias = modified_bias[..., xp.newaxis] * eye + + # nf x nloc x odims, out_bias: ntypes x odims + ret[kk] = ret[kk] + modified_bias + return ret diff --git a/deepmd/dpmodel/atomic_model/property_atomic_model.py b/deepmd/dpmodel/atomic_model/property_atomic_model.py new file mode 100644 index 0000000000..6f69f8dfb6 --- /dev/null +++ b/deepmd/dpmodel/atomic_model/property_atomic_model.py @@ -0,0 +1,17 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from deepmd.dpmodel.fitting.property_fitting import ( + PropertyFittingNet, +) + +from .dp_atomic_model import ( + DPAtomicModel, +) + + +class DPPropertyAtomicModel(DPAtomicModel): + def __init__(self, descriptor, fitting, type_map, **kwargs): + if not isinstance(fitting, PropertyFittingNet): + raise TypeError( + "fitting must be an instance of PropertyFittingNet for DPPropertyAtomicModel" + ) + super().__init__(descriptor, fitting, type_map, **kwargs) diff --git a/deepmd/dpmodel/common.py b/deepmd/dpmodel/common.py new file mode 100644 index 0000000000..8353cc28e3 --- /dev/null +++ b/deepmd/dpmodel/common.py @@ -0,0 +1,230 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from abc import ( + ABC, + abstractmethod, +) +from functools import ( + wraps, +) +from typing import ( + Any, + Callable, + Optional, + overload, +) + +import array_api_compat +import ml_dtypes +import numpy as np + +from deepmd.common import ( + VALID_PRECISION, +) +from deepmd.env import ( + GLOBAL_ENER_FLOAT_PRECISION, + GLOBAL_NP_FLOAT_PRECISION, +) + +PRECISION_DICT = { + "float16": np.float16, + "float32": np.float32, + "float64": np.float64, + "half": np.float16, + "single": np.float32, + "double": np.float64, + "int32": np.int32, + "int64": np.int64, + "bool": np.bool_, + "default": GLOBAL_NP_FLOAT_PRECISION, + # NumPy doesn't have bfloat16 (and doesn't plan to add) + # ml_dtypes is a solution, but it seems not supporting np.save/np.load + # hdf5 hasn't supported bfloat16 as well (see https://forum.hdfgroup.org/t/11975) + "bfloat16": ml_dtypes.bfloat16, +} +assert VALID_PRECISION.issubset(PRECISION_DICT.keys()) + +RESERVED_PRECISON_DICT = { + np.float16: "float16", + np.float32: "float32", + np.float64: "float64", + np.int32: "int32", + np.int64: "int64", + ml_dtypes.bfloat16: "bfloat16", + np.bool_: "bool", +} +assert set(RESERVED_PRECISON_DICT.keys()) == set(PRECISION_DICT.values()) +DEFAULT_PRECISION = "float64" + + +def get_xp_precision( + xp: Any, + precision: str, +): + """Get the precision from the API compatible namespace.""" + if precision == "float16" or precision == "half": + return xp.float16 + elif precision == "float32" or precision == "single": + return xp.float32 + elif precision == "float64" or precision == "double": + return xp.float64 + elif precision == "int32": + return xp.int32 + elif precision == "int64": + return xp.int64 + elif precision == "bool": + return bool + elif precision == "default": + return get_xp_precision(xp, RESERVED_PRECISON_DICT[PRECISION_DICT[precision]]) + elif precision == "global": + return get_xp_precision(xp, RESERVED_PRECISON_DICT[GLOBAL_NP_FLOAT_PRECISION]) + elif precision == "bfloat16": + return ml_dtypes.bfloat16 + else: + raise ValueError(f"unsupported precision {precision} for {xp}") + + +class NativeOP(ABC): + """The unit operation of a native model.""" + + @abstractmethod + def call(self, *args, **kwargs): + """Forward pass in NumPy implementation.""" + pass + + def __call__(self, *args, **kwargs): + """Forward pass in NumPy implementation.""" + return self.call(*args, **kwargs) + + +def to_numpy_array(x: Any) -> Optional[np.ndarray]: + """Convert an array to a NumPy array. + + Parameters + ---------- + x : Any + The array to be converted. + + Returns + ------- + Optional[np.ndarray] + The NumPy array. + """ + if x is None: + return None + try: + # asarray is not within Array API standard, so may fail + return np.asarray(x) + except (ValueError, AttributeError): + xp = array_api_compat.array_namespace(x) + # to fix BufferError: Cannot export readonly array since signalling readonly is unsupported by DLPack. + x = xp.asarray(x, copy=True) + return np.from_dlpack(x) + + +def cast_precision(func: Callable[..., Any]) -> Callable[..., Any]: + """A decorator that casts and casts back the input + and output tensor of a method. + + The decorator should be used on an instance method. + + The decorator will do the following thing: + (1) It casts input arrays from the global precision + to precision defined by property `precision`. + (2) It casts output arrays from `precision` to + the global precision. + (3) It checks inputs and outputs and only casts when + input or output is an array and its dtype matches + the global precision and `precision`, respectively. + If it does not match (e.g. it is an integer), the decorator + will do nothing on it. + + The decorator supports the array API. + + Returns + ------- + Callable + a decorator that casts and casts back the input and + output array of a method + + Examples + -------- + >>> class A: + ... def __init__(self): + ... self.precision = "float32" + ... + ... @cast_precision + ... def f(x: Array, y: Array) -> Array: + ... return x**2 + y + """ + + @wraps(func) + def wrapper(self, *args, **kwargs): + # only convert tensors + returned_tensor = func( + self, + *[safe_cast_array(vv, "global", self.precision) for vv in args], + **{ + kk: safe_cast_array(vv, "global", self.precision) + for kk, vv in kwargs.items() + }, + ) + if isinstance(returned_tensor, tuple): + return tuple( + safe_cast_array(vv, self.precision, "global") for vv in returned_tensor + ) + elif isinstance(returned_tensor, dict): + return { + kk: safe_cast_array(vv, self.precision, "global") + for kk, vv in returned_tensor.items() + } + else: + return safe_cast_array(returned_tensor, self.precision, "global") + + return wrapper + + +@overload +def safe_cast_array( + input: np.ndarray, from_precision: str, to_precision: str +) -> np.ndarray: ... +@overload +def safe_cast_array(input: None, from_precision: str, to_precision: str) -> None: ... +def safe_cast_array( + input: Optional[np.ndarray], from_precision: str, to_precision: str +) -> Optional[np.ndarray]: + """Convert an array from a precision to another precision. + + If input is not an array or without the specific precision, the method will not + cast it. + + Array API is supported. + + Parameters + ---------- + input : np.ndarray or None + Input array + from_precision : str + Array data type that is casted from + to_precision : str + Array data type that casts to + + Returns + ------- + np.ndarray or None + casted array + """ + if array_api_compat.is_array_api_obj(input): + xp = array_api_compat.array_namespace(input) + if input.dtype == get_xp_precision(xp, from_precision): + return xp.astype(input, get_xp_precision(xp, to_precision)) + return input + + +__all__ = [ + "GLOBAL_NP_FLOAT_PRECISION", + "GLOBAL_ENER_FLOAT_PRECISION", + "PRECISION_DICT", + "RESERVED_PRECISON_DICT", + "DEFAULT_PRECISION", + "NativeOP", +] diff --git a/deepmd/dpmodel/descriptor/__init__.py b/deepmd/dpmodel/descriptor/__init__.py new file mode 100644 index 0000000000..de22757647 --- /dev/null +++ b/deepmd/dpmodel/descriptor/__init__.py @@ -0,0 +1,40 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from .dpa1 import ( + DescrptDPA1, +) +from .dpa2 import ( + DescrptDPA2, +) +from .hybrid import ( + DescrptHybrid, +) +from .make_base_descriptor import ( + make_base_descriptor, +) +from .se_atten_v2 import ( + DescrptSeAttenV2, +) +from .se_e2_a import ( + DescrptSeA, +) +from .se_r import ( + DescrptSeR, +) +from .se_t import ( + DescrptSeT, +) +from .se_t_tebd import ( + DescrptSeTTebd, +) + +__all__ = [ + "DescrptSeA", + "DescrptSeR", + "DescrptSeT", + "DescrptSeTTebd", + "DescrptDPA1", + "DescrptSeAttenV2", + "DescrptDPA2", + "DescrptHybrid", + "make_base_descriptor", +] diff --git a/deepmd/dpmodel/descriptor/base_descriptor.py b/deepmd/dpmodel/descriptor/base_descriptor.py new file mode 100644 index 0000000000..7429d3f213 --- /dev/null +++ b/deepmd/dpmodel/descriptor/base_descriptor.py @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later + +import numpy as np + +from .make_base_descriptor import ( + make_base_descriptor, +) + +BaseDescriptor = make_base_descriptor(np.ndarray, "call") diff --git a/deepmd/dpmodel/descriptor/descriptor.py b/deepmd/dpmodel/descriptor/descriptor.py new file mode 100644 index 0000000000..443a2a66f1 --- /dev/null +++ b/deepmd/dpmodel/descriptor/descriptor.py @@ -0,0 +1,171 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import logging +from abc import ( + ABC, + abstractmethod, +) +from typing import ( + Callable, + NoReturn, + Optional, + Union, +) + +import numpy as np + +from deepmd.utils.env_mat_stat import ( + StatItem, +) +from deepmd.utils.path import ( + DPPath, +) +from deepmd.utils.plugin import ( + make_plugin_registry, +) + +log = logging.getLogger(__name__) + + +class DescriptorBlock(ABC, make_plugin_registry("DescriptorBlock")): + """The building block of descriptor. + Given the input descriptor, provide with the atomic coordinates, + atomic types and neighbor list, calculate the new descriptor. + """ + + local_cluster = False + + def __new__(cls, *args, **kwargs): + if cls is DescriptorBlock: + try: + descrpt_type = kwargs["type"] + except KeyError as e: + raise KeyError( + "the type of DescriptorBlock should be set by `type`" + ) from e + cls = cls.get_class_by_type(descrpt_type) + return super().__new__(cls) + + @abstractmethod + def get_rcut(self) -> float: + """Returns the cut-off radius.""" + pass + + @abstractmethod + def get_nsel(self) -> int: + """Returns the number of selected atoms in the cut-off radius.""" + pass + + @abstractmethod + def get_sel(self) -> list[int]: + """Returns the number of selected atoms for each type.""" + pass + + @abstractmethod + def get_ntypes(self) -> int: + """Returns the number of element types.""" + pass + + @abstractmethod + def get_dim_out(self) -> int: + """Returns the output dimension.""" + pass + + @abstractmethod + def get_dim_in(self) -> int: + """Returns the input dimension.""" + pass + + @abstractmethod + def get_dim_emb(self) -> int: + """Returns the embedding dimension.""" + pass + + def compute_input_stats( + self, + merged: Union[Callable[[], list[dict]], list[dict]], + path: Optional[DPPath] = None, + ) -> NoReturn: + """ + Compute the input statistics (e.g. mean and stddev) for the descriptors from packed data. + + Parameters + ---------- + merged : Union[Callable[[], list[dict]], list[dict]] + - list[dict]: A list of data samples from various data systems. + Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor` + originating from the `i`-th data system. + - Callable[[], list[dict]]: A lazy function that returns data samples in the above format + only when needed. Since the sampling process can be slow and memory-intensive, + the lazy function helps by only sampling once. + path : Optional[DPPath] + The path to the stat file. + + """ + raise NotImplementedError + + def get_stats(self) -> dict[str, StatItem]: + """Get the statistics of the descriptor.""" + raise NotImplementedError + + def share_params(self, base_class, shared_level, resume=False) -> NoReturn: + """ + Share the parameters of self to the base_class with shared_level during multitask training. + If not start from checkpoint (resume is False), + some separated parameters (e.g. mean and stddev) will be re-calculated across different classes. + """ + raise NotImplementedError + + @abstractmethod + def call( + self, + nlist: np.ndarray, + extended_coord: np.ndarray, + extended_atype: np.ndarray, + extended_atype_embd: Optional[np.ndarray] = None, + mapping: Optional[np.ndarray] = None, + type_embedding: Optional[np.ndarray] = None, + ): + """Calculate DescriptorBlock.""" + pass + + @abstractmethod + def has_message_passing(self) -> bool: + """Returns whether the descriptor block has message passing.""" + + @abstractmethod + def need_sorted_nlist_for_lower(self) -> bool: + """Returns whether the descriptor block needs sorted nlist when using `forward_lower`.""" + + +def extend_descrpt_stat(des, type_map, des_with_stat=None) -> None: + r""" + Extend the statistics of a descriptor block with types from newly provided `type_map`. + + After extending, the type related dimension of the extended statistics will have a length of + `len(old_type_map) + len(type_map)`, where `old_type_map` represents the type map in `des`. + The `get_index_between_two_maps()` function can then be used to correctly select statistics for types + from `old_type_map` or `type_map`. + Positive indices from 0 to `len(old_type_map) - 1` will select old statistics of types in `old_type_map`, + while negative indices from `-len(type_map)` to -1 will select new statistics of types in `type_map`. + + Parameters + ---------- + des : DescriptorBlock + The descriptor block to be extended. + type_map : list[str] + The name of each type of atoms to be extended. + des_with_stat : DescriptorBlock, Optional + The descriptor block has additional statistics of types from newly provided `type_map`. + If None, the default statistics will be used. + Otherwise, the statistics provided in this DescriptorBlock will be used. + + """ + if des_with_stat is not None: + extend_davg = des_with_stat["davg"] + extend_dstd = des_with_stat["dstd"] + else: + extend_shape = [len(type_map), *list(des["davg"].shape[1:])] + extend_davg = np.zeros(extend_shape, dtype=des["davg"].dtype) + extend_dstd = np.ones(extend_shape, dtype=des["dstd"].dtype) + des["davg"] = np.concatenate([des["davg"], extend_davg], axis=0) + des["dstd"] = np.concatenate([des["dstd"], extend_dstd], axis=0) diff --git a/deepmd/dpmodel/descriptor/dpa1.py b/deepmd/dpmodel/descriptor/dpa1.py new file mode 100644 index 0000000000..20a758b170 --- /dev/null +++ b/deepmd/dpmodel/descriptor/dpa1.py @@ -0,0 +1,1485 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import math +from typing import ( + Any, + Callable, + NoReturn, + Optional, + Union, +) + +import array_api_compat +import numpy as np + +from deepmd.dpmodel import ( + DEFAULT_PRECISION, + PRECISION_DICT, + NativeOP, +) +from deepmd.dpmodel.array_api import ( + xp_take_along_axis, +) +from deepmd.dpmodel.common import ( + cast_precision, + to_numpy_array, +) +from deepmd.dpmodel.utils import ( + EmbeddingNet, + EnvMat, + NetworkCollection, + PairExcludeMask, +) +from deepmd.dpmodel.utils.network import ( + LayerNorm, + NativeLayer, +) +from deepmd.dpmodel.utils.safe_gradient import ( + safe_for_vector_norm, +) +from deepmd.dpmodel.utils.seed import ( + child_seed, +) +from deepmd.dpmodel.utils.type_embed import ( + TypeEmbedNet, +) +from deepmd.dpmodel.utils.update_sel import ( + UpdateSel, +) +from deepmd.utils.data_system import ( + DeepmdDataSystem, +) +from deepmd.utils.finetune import ( + get_index_between_two_maps, + map_pair_exclude_types, +) +from deepmd.utils.path import ( + DPPath, +) +from deepmd.utils.version import ( + check_version_compatibility, +) + +from .base_descriptor import ( + BaseDescriptor, +) +from .descriptor import ( + DescriptorBlock, + extend_descrpt_stat, +) + + +def np_softmax(x, axis=-1): + xp = array_api_compat.array_namespace(x) + # x = xp.nan_to_num(x) # to avoid value warning + x = xp.where(xp.isnan(x), xp.zeros_like(x), x) + e_x = xp.exp(x - xp.max(x, axis=axis, keepdims=True)) + return e_x / xp.sum(e_x, axis=axis, keepdims=True) + + +def np_normalize(x, axis=-1): + xp = array_api_compat.array_namespace(x) + return x / xp.linalg.vector_norm(x, axis=axis, keepdims=True) + + +@BaseDescriptor.register("se_atten") +@BaseDescriptor.register("dpa1") +class DescrptDPA1(NativeOP, BaseDescriptor): + r"""Attention-based descriptor which is proposed in the pretrainable DPA-1[1] model. + + This descriptor, :math:`\mathcal{D}^i \in \mathbb{R}^{M \times M_{<}}`, is given by + + .. math:: + \mathcal{D}^i = \frac{1}{N_c^2}(\hat{\mathcal{G}}^i)^T \mathcal{R}^i (\mathcal{R}^i)^T \hat{\mathcal{G}}^i_<, + + where :math:`\hat{\mathcal{G}}^i` represents the embedding matrix:math:`\mathcal{G}^i` + after additional self-attention mechanism and :math:`\mathcal{R}^i` is defined by the full case in the se_e2_a descriptor. + Note that we obtain :math:`\mathcal{G}^i` using the type embedding method by default in this descriptor. + + To perform the self-attention mechanism, the queries :math:`\mathcal{Q}^{i,l} \in \mathbb{R}^{N_c\times d_k}`, + keys :math:`\mathcal{K}^{i,l} \in \mathbb{R}^{N_c\times d_k}`, + and values :math:`\mathcal{V}^{i,l} \in \mathbb{R}^{N_c\times d_v}` are first obtained: + + .. math:: + \left(\mathcal{Q}^{i,l}\right)_{j}=Q_{l}\left(\left(\mathcal{G}^{i,l-1}\right)_{j}\right), + + .. math:: + \left(\mathcal{K}^{i,l}\right)_{j}=K_{l}\left(\left(\mathcal{G}^{i,l-1}\right)_{j}\right), + + .. math:: + \left(\mathcal{V}^{i,l}\right)_{j}=V_{l}\left(\left(\mathcal{G}^{i,l-1}\right)_{j}\right), + + where :math:`Q_{l}`, :math:`K_{l}`, :math:`V_{l}` represent three trainable linear transformations + that output the queries and keys of dimension :math:`d_k` and values of dimension :math:`d_v`, and :math:`l` + is the index of the attention layer. + The input embedding matrix to the attention layers, denoted by :math:`\mathcal{G}^{i,0}`, + is chosen as the two-body embedding matrix. + + Then the scaled dot-product attention method is adopted: + + .. math:: + A(\mathcal{Q}^{i,l}, \mathcal{K}^{i,l}, \mathcal{V}^{i,l}, \mathcal{R}^{i,l})=\varphi\left(\mathcal{Q}^{i,l}, \mathcal{K}^{i,l},\mathcal{R}^{i,l}\right)\mathcal{V}^{i,l}, + + where :math:`\varphi\left(\mathcal{Q}^{i,l}, \mathcal{K}^{i,l},\mathcal{R}^{i,l}\right) \in \mathbb{R}^{N_c\times N_c}` is attention weights. + In the original attention method, + one typically has :math:`\varphi\left(\mathcal{Q}^{i,l}, \mathcal{K}^{i,l}\right)=\mathrm{softmax}\left(\frac{\mathcal{Q}^{i,l} (\mathcal{K}^{i,l})^{T}}{\sqrt{d_{k}}}\right)`, + with :math:`\sqrt{d_{k}}` being the normalization temperature. + This is slightly modified to incorporate the angular information: + + .. math:: + \varphi\left(\mathcal{Q}^{i,l}, \mathcal{K}^{i,l},\mathcal{R}^{i,l}\right) = \mathrm{softmax}\left(\frac{\mathcal{Q}^{i,l} (\mathcal{K}^{i,l})^{T}}{\sqrt{d_{k}}}\right) \odot \hat{\mathcal{R}}^{i}(\hat{\mathcal{R}}^{i})^{T}, + + where :math:`\hat{\mathcal{R}}^{i} \in \mathbb{R}^{N_c\times 3}` denotes normalized relative coordinates, + :math:`\hat{\mathcal{R}}^{i}_{j} = \frac{\boldsymbol{r}_{ij}}{\lVert \boldsymbol{r}_{ij} \lVert}` + and :math:`\odot` means element-wise multiplication. + + Then layer normalization is added in a residual way to finally obtain the self-attention local embedding matrix + :math:`\hat{\mathcal{G}}^{i} = \mathcal{G}^{i,L_a}` after :math:`L_a` attention layers:[^1] + + .. math:: + \mathcal{G}^{i,l} = \mathcal{G}^{i,l-1} + \mathrm{LayerNorm}(A(\mathcal{Q}^{i,l}, \mathcal{K}^{i,l}, \mathcal{V}^{i,l}, \mathcal{R}^{i,l})). + + Parameters + ---------- + rcut: float + The cut-off radius :math:`r_c` + rcut_smth: float + From where the environment matrix should be smoothed :math:`r_s` + sel : list[int], int + list[int]: sel[i] specifies the maxmum number of type i atoms in the cut-off radius + int: the total maxmum number of atoms in the cut-off radius + ntypes : int + Number of element types + neuron : list[int] + Number of neurons in each hidden layers of the embedding net :math:`\mathcal{N}` + axis_neuron: int + Number of the axis neuron :math:`M_2` (number of columns of the sub-matrix of the embedding matrix) + tebd_dim: int + Dimension of the type embedding + tebd_input_mode: str + The input mode of the type embedding. Supported modes are ["concat", "strip"]. + - "concat": Concatenate the type embedding with the smoothed radial information as the union input for the embedding network. + - "strip": Use a separated embedding network for the type embedding and combine the output with the radial embedding network output. + resnet_dt: bool + Time-step `dt` in the resnet construction: + y = x + dt * \phi (Wx + b) + trainable: bool + If the weights of this descriptors are trainable. + trainable_ln: bool + Whether to use trainable shift and scale weights in layer normalization. + ln_eps: float, Optional + The epsilon value for layer normalization. + type_one_side: bool + If 'False', type embeddings of both neighbor and central atoms are considered. + If 'True', only type embeddings of neighbor atoms are considered. + Default is 'False'. + attn: int + Hidden dimension of the attention vectors + attn_layer: int + Number of attention layers + attn_dotr: bool + If dot the angular gate to the attention weights + attn_mask: bool + (Only support False to keep consistent with other backend references.) + (Not used in this version. True option is not implemented.) + If mask the diagonal of attention weights + exclude_types : list[list[int]] + The excluded pairs of types which have no interaction with each other. + For example, `[[0, 1]]` means no interaction between type 0 and type 1. + env_protection: float + Protection parameter to prevent division by zero errors during environment matrix calculations. + set_davg_zero: bool + Set the shift of embedding net input to zero. + activation_function: str + The activation function in the embedding net. Supported options are |ACTIVATION_FN| + precision: str + The precision of the embedding net parameters. Supported options are |PRECISION| + scaling_factor: float + The scaling factor of normalization in calculations of attention weights. + If `temperature` is None, the scaling of attention weights is (N_dim * scaling_factor)**0.5 + normalize: bool + Whether to normalize the hidden vectors in attention weights calculation. + temperature: float + If not None, the scaling of attention weights is `temperature` itself. + smooth_type_embedding: bool + Whether to use smooth process in attention weights calculation. + concat_output_tebd: bool + Whether to concat type embedding at the output of the descriptor. + stripped_type_embedding: bool, Optional + (Deprecated, kept only for compatibility.) + Whether to strip the type embedding into a separate embedding network. + Setting this parameter to `True` is equivalent to setting `tebd_input_mode` to 'strip'. + Setting it to `False` is equivalent to setting `tebd_input_mode` to 'concat'. + The default value is `None`, which means the `tebd_input_mode` setting will be used instead. + use_econf_tebd: bool, Optional + Whether to use electronic configuration type embedding. + use_tebd_bias : bool, Optional + Whether to use bias in the type embedding layer. + type_map: list[str], Optional + A list of strings. Give the name to each type of atoms. + spin + (Only support None to keep consistent with other backend references.) + (Not used in this version. Not-none option is not implemented.) + The old implementation of deepspin. + + Limitations + ----------- + The currently implementation will not support the following deprecated features + 1. spin is not None + 2. attn_mask == True + + References + ---------- + .. [1] Duo Zhang, Hangrui Bi, Fu-Zhi Dai, Wanrun Jiang, Linfeng Zhang, and Han Wang. 2022. + DPA-1: Pretraining of Attention-based Deep Potential Model for Molecular Simulation. + arXiv preprint arXiv:2208.08236. + """ + + def __init__( + self, + rcut: float, + rcut_smth: float, + sel: Union[list[int], int], + ntypes: int, + neuron: list[int] = [25, 50, 100], + axis_neuron: int = 8, + tebd_dim: int = 8, + tebd_input_mode: str = "concat", + resnet_dt: bool = False, + trainable: bool = True, + type_one_side: bool = False, + attn: int = 128, + attn_layer: int = 2, + attn_dotr: bool = True, + attn_mask: bool = False, + exclude_types: list[tuple[int, int]] = [], + env_protection: float = 0.0, + set_davg_zero: bool = False, + activation_function: str = "tanh", + precision: str = DEFAULT_PRECISION, + scaling_factor=1.0, + normalize: bool = True, + temperature: Optional[float] = None, + trainable_ln: bool = True, + ln_eps: Optional[float] = 1e-5, + smooth_type_embedding: bool = True, + concat_output_tebd: bool = True, + spin: Optional[Any] = None, + stripped_type_embedding: Optional[bool] = None, + use_econf_tebd: bool = False, + use_tebd_bias: bool = False, + type_map: Optional[list[str]] = None, + # consistent with argcheck, not used though + seed: Optional[Union[int, list[int]]] = None, + ) -> None: + ## seed, uniform_seed, not included. + # Ensure compatibility with the deprecated stripped_type_embedding option. + if stripped_type_embedding is not None: + # Use the user-set stripped_type_embedding parameter first + tebd_input_mode = "strip" if stripped_type_embedding else "concat" + if spin is not None: + raise NotImplementedError("old implementation of spin is not supported.") + if attn_mask: + raise NotImplementedError( + "old implementation of attn_mask is not supported." + ) + # to keep consistent with default value in this backends + if ln_eps is None: + ln_eps = 1e-5 + + self.se_atten = DescrptBlockSeAtten( + rcut, + rcut_smth, + sel, + ntypes, + neuron=neuron, + axis_neuron=axis_neuron, + tebd_dim=tebd_dim, + tebd_input_mode=tebd_input_mode, + set_davg_zero=set_davg_zero, + attn=attn, + attn_layer=attn_layer, + attn_dotr=attn_dotr, + attn_mask=False, + activation_function=activation_function, + precision=precision, + resnet_dt=resnet_dt, + scaling_factor=scaling_factor, + normalize=normalize, + temperature=temperature, + smooth=smooth_type_embedding, + type_one_side=type_one_side, + exclude_types=exclude_types, + env_protection=env_protection, + trainable_ln=trainable_ln, + ln_eps=ln_eps, + seed=child_seed(seed, 0), + ) + self.use_econf_tebd = use_econf_tebd + self.use_tebd_bias = use_tebd_bias + self.type_map = type_map + self.type_embedding = TypeEmbedNet( + ntypes=ntypes, + neuron=[tebd_dim], + padding=True, + activation_function="Linear", + precision=precision, + use_econf_tebd=use_econf_tebd, + use_tebd_bias=use_tebd_bias, + type_map=type_map, + seed=child_seed(seed, 1), + ) + self.tebd_dim = tebd_dim + self.concat_output_tebd = concat_output_tebd + self.trainable = trainable + self.precision = precision + + def get_rcut(self) -> float: + """Returns the cut-off radius.""" + return self.se_atten.get_rcut() + + def get_rcut_smth(self) -> float: + """Returns the radius where the neighbor information starts to smoothly decay to 0.""" + return self.se_atten.get_rcut_smth() + + def get_nsel(self) -> int: + """Returns the number of selected atoms in the cut-off radius.""" + return self.se_atten.get_nsel() + + def get_sel(self) -> list[int]: + """Returns the number of selected atoms for each type.""" + return self.se_atten.get_sel() + + def get_ntypes(self) -> int: + """Returns the number of element types.""" + return self.se_atten.get_ntypes() + + def get_type_map(self) -> list[str]: + """Get the name to each type of atoms.""" + return self.type_map + + def get_dim_out(self) -> int: + """Returns the output dimension.""" + ret = self.se_atten.get_dim_out() + if self.concat_output_tebd: + ret += self.tebd_dim + return ret + + def get_dim_emb(self) -> int: + return self.se_atten.dim_emb + + def mixed_types(self) -> bool: + """If true, the descriptor + 1. assumes total number of atoms aligned across frames; + 2. requires a neighbor list that does not distinguish different atomic types. + + If false, the descriptor + 1. assumes total number of atoms of each atom type aligned across frames; + 2. requires a neighbor list that distinguishes different atomic types. + + """ + return self.se_atten.mixed_types() + + def has_message_passing(self) -> bool: + """Returns whether the descriptor has message passing.""" + return self.se_atten.has_message_passing() + + def need_sorted_nlist_for_lower(self) -> bool: + """Returns whether the descriptor needs sorted nlist when using `forward_lower`.""" + return self.se_atten.need_sorted_nlist_for_lower() + + def get_env_protection(self) -> float: + """Returns the protection of building environment matrix.""" + return self.se_atten.get_env_protection() + + def share_params(self, base_class, shared_level, resume=False) -> NoReturn: + """ + Share the parameters of self to the base_class with shared_level during multitask training. + If not start from checkpoint (resume is False), + some separated parameters (e.g. mean and stddev) will be re-calculated across different classes. + """ + raise NotImplementedError + + @property + def dim_out(self): + return self.get_dim_out() + + @property + def dim_emb(self): + return self.get_dim_emb() + + def compute_input_stats( + self, merged: list[dict], path: Optional[DPPath] = None + ) -> NoReturn: + """Update mean and stddev for descriptor elements.""" + raise NotImplementedError + + def set_stat_mean_and_stddev( + self, + mean: np.ndarray, + stddev: np.ndarray, + ) -> None: + """Update mean and stddev for descriptor.""" + self.se_atten.mean = mean + self.se_atten.stddev = stddev + + def get_stat_mean_and_stddev(self) -> tuple[np.ndarray, np.ndarray]: + """Get mean and stddev for descriptor.""" + return self.se_atten.mean, self.se_atten.stddev + + def change_type_map( + self, type_map: list[str], model_with_new_type_stat=None + ) -> None: + """Change the type related params to new ones, according to `type_map` and the original one in the model. + If there are new types in `type_map`, statistics will be updated accordingly to `model_with_new_type_stat` for these new types. + """ + assert ( + self.type_map is not None + ), "'type_map' must be defined when performing type changing!" + remap_index, has_new_type = get_index_between_two_maps(self.type_map, type_map) + obj = self.se_atten + obj.ntypes = len(type_map) + self.type_map = type_map + self.type_embedding.change_type_map(type_map=type_map) + obj.reinit_exclude(map_pair_exclude_types(obj.exclude_types, remap_index)) + if has_new_type: + # the avg and std of new types need to be updated + extend_descrpt_stat( + obj, + type_map, + des_with_stat=model_with_new_type_stat.se_atten + if model_with_new_type_stat is not None + else None, + ) + obj["davg"] = obj["davg"][remap_index] + obj["dstd"] = obj["dstd"][remap_index] + + @cast_precision + def call( + self, + coord_ext, + atype_ext, + nlist, + mapping: Optional[np.ndarray] = None, + ): + """Compute the descriptor. + + Parameters + ---------- + coord_ext + The extended coordinates of atoms. shape: nf x (nallx3) + atype_ext + The extended aotm types. shape: nf x nall + nlist + The neighbor list. shape: nf x nloc x nnei + mapping + The index mapping from extended to local region. not used by this descriptor. + + Returns + ------- + descriptor + The descriptor. shape: nf x nloc x (ng x axis_neuron) + gr + The rotationally equivariant and permutationally invariant single particle + representation. shape: nf x nloc x ng x 3 + g2 + The rotationally invariant pair-partical representation. + this descriptor returns None + h2 + The rotationally equivariant pair-partical representation. + this descriptor returns None + sw + The smooth switch function. + """ + del mapping + xp = array_api_compat.array_namespace(coord_ext, atype_ext, nlist) + nf, nloc, nnei = nlist.shape + nall = xp.reshape(coord_ext, (nf, -1)).shape[1] // 3 + type_embedding = self.type_embedding.call() + # nf x nall x tebd_dim + atype_embd_ext = xp.reshape( + xp.take(type_embedding, xp.reshape(atype_ext, [-1]), axis=0), + (nf, nall, self.tebd_dim), + ) + # nfnl x tebd_dim + atype_embd = atype_embd_ext[:, :nloc, :] + grrg, g2, h2, rot_mat, sw = self.se_atten( + nlist, + coord_ext, + atype_ext, + atype_embd_ext, + mapping=None, + type_embedding=type_embedding, + ) + # nf x nloc x (ng x ng1 + tebd_dim) + if self.concat_output_tebd: + grrg = xp.concat( + [grrg, xp.reshape(atype_embd, (nf, nloc, self.tebd_dim))], axis=-1 + ) + return grrg, rot_mat, None, None, sw + + def serialize(self) -> dict: + """Serialize the descriptor to dict.""" + obj = self.se_atten + data = { + "@class": "Descriptor", + "type": "dpa1", + "@version": 2, + "rcut": obj.rcut, + "rcut_smth": obj.rcut_smth, + "sel": obj.sel, + "ntypes": obj.ntypes, + "neuron": obj.neuron, + "axis_neuron": obj.axis_neuron, + "tebd_dim": obj.tebd_dim, + "tebd_input_mode": obj.tebd_input_mode, + "set_davg_zero": obj.set_davg_zero, + "attn": obj.attn, + "attn_layer": obj.attn_layer, + "attn_dotr": obj.attn_dotr, + "attn_mask": False, + "activation_function": obj.activation_function, + "resnet_dt": obj.resnet_dt, + "scaling_factor": obj.scaling_factor, + "normalize": obj.normalize, + "temperature": obj.temperature, + "trainable_ln": obj.trainable_ln, + "ln_eps": obj.ln_eps, + "smooth_type_embedding": obj.smooth, + "type_one_side": obj.type_one_side, + "concat_output_tebd": self.concat_output_tebd, + "use_econf_tebd": self.use_econf_tebd, + "use_tebd_bias": self.use_tebd_bias, + "type_map": self.type_map, + # make deterministic + "precision": np.dtype(PRECISION_DICT[obj.precision]).name, + "embeddings": obj.embeddings.serialize(), + "attention_layers": obj.dpa1_attention.serialize(), + "env_mat": obj.env_mat.serialize(), + "type_embedding": self.type_embedding.serialize(), + "exclude_types": obj.exclude_types, + "env_protection": obj.env_protection, + "@variables": { + "davg": to_numpy_array(obj["davg"]), + "dstd": to_numpy_array(obj["dstd"]), + }, + ## to be updated when the options are supported. + "trainable": self.trainable, + "spin": None, + } + if obj.tebd_input_mode in ["strip"]: + data.update({"embeddings_strip": obj.embeddings_strip.serialize()}) + return data + + @classmethod + def deserialize(cls, data: dict) -> "DescrptDPA1": + """Deserialize from dict.""" + data = data.copy() + check_version_compatibility(data.pop("@version"), 2, 1) + data.pop("@class") + data.pop("type") + variables = data.pop("@variables") + embeddings = data.pop("embeddings") + type_embedding = data.pop("type_embedding") + attention_layers = data.pop("attention_layers") + env_mat = data.pop("env_mat") + tebd_input_mode = data["tebd_input_mode"] + if tebd_input_mode in ["strip"]: + embeddings_strip = data.pop("embeddings_strip") + else: + embeddings_strip = None + # compat with version 1 + if "use_tebd_bias" not in data: + data["use_tebd_bias"] = True + obj = cls(**data) + + obj.se_atten["davg"] = variables["davg"] + obj.se_atten["dstd"] = variables["dstd"] + obj.se_atten.embeddings = NetworkCollection.deserialize(embeddings) + if tebd_input_mode in ["strip"]: + obj.se_atten.embeddings_strip = NetworkCollection.deserialize( + embeddings_strip + ) + obj.type_embedding = TypeEmbedNet.deserialize(type_embedding) + obj.se_atten.dpa1_attention = NeighborGatedAttention.deserialize( + attention_layers + ) + return obj + + @classmethod + def update_sel( + cls, + train_data: DeepmdDataSystem, + type_map: Optional[list[str]], + local_jdata: dict, + ) -> tuple[dict, Optional[float]]: + """Update the selection and perform neighbor statistics. + + Parameters + ---------- + train_data : DeepmdDataSystem + data used to do neighbor statistics + type_map : list[str], optional + The name of each type of atoms + local_jdata : dict + The local data refer to the current class + + Returns + ------- + dict + The updated local data + float + The minimum distance between two atoms + """ + local_jdata_cpy = local_jdata.copy() + min_nbor_dist, sel = UpdateSel().update_one_sel( + train_data, type_map, local_jdata_cpy["rcut"], local_jdata_cpy["sel"], True + ) + local_jdata_cpy["sel"] = sel[0] + return local_jdata_cpy, min_nbor_dist + + +@DescriptorBlock.register("se_atten") +class DescrptBlockSeAtten(NativeOP, DescriptorBlock): + def __init__( + self, + rcut: float, + rcut_smth: float, + sel: Union[list[int], int], + ntypes: int, + neuron: list[int] = [25, 50, 100], + axis_neuron: int = 8, + tebd_dim: int = 8, + tebd_input_mode: str = "concat", + resnet_dt: bool = False, + type_one_side: bool = False, + attn: int = 128, + attn_layer: int = 2, + attn_dotr: bool = True, + attn_mask: bool = False, + exclude_types: list[tuple[int, int]] = [], + env_protection: float = 0.0, + set_davg_zero: bool = False, + activation_function: str = "tanh", + precision: str = DEFAULT_PRECISION, + scaling_factor=1.0, + normalize: bool = True, + temperature: Optional[float] = None, + trainable_ln: bool = True, + ln_eps: Optional[float] = 1e-5, + smooth: bool = True, + seed: Optional[Union[int, list[int]]] = None, + ) -> None: + self.rcut = rcut + self.rcut_smth = rcut_smth + if isinstance(sel, int): + sel = [sel] + self.sel = sel + self.nnei = sum(sel) + self.ntypes = ntypes + self.neuron = neuron + self.filter_neuron = self.neuron + self.axis_neuron = axis_neuron + self.tebd_dim = tebd_dim + self.tebd_input_mode = tebd_input_mode + self.resnet_dt = resnet_dt + self.trainable_ln = trainable_ln + self.ln_eps = ln_eps + self.type_one_side = type_one_side + self.attn = attn + self.attn_layer = attn_layer + self.attn_dotr = attn_dotr + self.attn_mask = attn_mask + self.exclude_types = exclude_types + self.env_protection = env_protection + self.set_davg_zero = set_davg_zero + self.activation_function = activation_function + self.precision = precision + self.scaling_factor = scaling_factor + self.normalize = normalize + self.temperature = temperature + self.smooth = smooth + # order matters, placed after the assignment of self.ntypes + self.reinit_exclude(exclude_types) + + self.tebd_dim_input = self.tebd_dim if self.type_one_side else self.tebd_dim * 2 + if self.tebd_input_mode in ["concat"]: + self.embd_input_dim = 1 + self.tebd_dim_input + else: + self.embd_input_dim = 1 + embeddings = NetworkCollection( + ndim=0, + ntypes=self.ntypes, + network_type="embedding_network", + ) + embeddings[0] = EmbeddingNet( + self.embd_input_dim, + self.neuron, + self.activation_function, + self.resnet_dt, + self.precision, + seed=child_seed(seed, 0), + ) + self.embeddings = embeddings + if self.tebd_input_mode in ["strip"]: + embeddings_strip = NetworkCollection( + ndim=0, + ntypes=self.ntypes, + network_type="embedding_network", + ) + embeddings_strip[0] = EmbeddingNet( + self.tebd_dim_input, + self.neuron, + self.activation_function, + self.resnet_dt, + self.precision, + seed=child_seed(seed, 1), + ) + self.embeddings_strip = embeddings_strip + else: + self.embeddings_strip = None + self.dpa1_attention = NeighborGatedAttention( + self.attn_layer, + self.nnei, + self.filter_neuron[-1], + self.attn, + dotr=self.attn_dotr, + scaling_factor=self.scaling_factor, + normalize=self.normalize, + temperature=self.temperature, + trainable_ln=self.trainable_ln, + ln_eps=self.ln_eps, + smooth=self.smooth, + precision=self.precision, + seed=child_seed(seed, 2), + ) + + wanted_shape = (self.ntypes, self.nnei, 4) + self.env_mat = EnvMat(self.rcut, self.rcut_smth, protection=self.env_protection) + self.mean = np.zeros(wanted_shape, dtype=PRECISION_DICT[self.precision]) + self.stddev = np.ones(wanted_shape, dtype=PRECISION_DICT[self.precision]) + self.orig_sel = self.sel + + def get_rcut(self) -> float: + """Returns the cut-off radius.""" + return self.rcut + + def get_rcut_smth(self) -> float: + """Returns the radius where the neighbor information starts to smoothly decay to 0.""" + return self.rcut_smth + + def get_nsel(self) -> int: + """Returns the number of selected atoms in the cut-off radius.""" + return sum(self.sel) + + def get_sel(self) -> list[int]: + """Returns the number of selected atoms for each type.""" + return self.sel + + def get_ntypes(self) -> int: + """Returns the number of element types.""" + return self.ntypes + + def get_dim_in(self) -> int: + """Returns the input dimension.""" + return self.dim_in + + def get_dim_out(self) -> int: + """Returns the output dimension.""" + return self.dim_out + + def get_dim_emb(self) -> int: + """Returns the output dimension of embedding.""" + return self.filter_neuron[-1] + + def __setitem__(self, key, value) -> None: + if key in ("avg", "data_avg", "davg"): + self.mean = value + elif key in ("std", "data_std", "dstd"): + self.stddev = value + else: + raise KeyError(key) + + def __getitem__(self, key): + if key in ("avg", "data_avg", "davg"): + return self.mean + elif key in ("std", "data_std", "dstd"): + return self.stddev + else: + raise KeyError(key) + + def mixed_types(self) -> bool: + """If true, the descriptor + 1. assumes total number of atoms aligned across frames; + 2. requires a neighbor list that does not distinguish different atomic types. + + If false, the descriptor + 1. assumes total number of atoms of each atom type aligned across frames; + 2. requires a neighbor list that distinguishes different atomic types. + + """ + return True + + def get_env_protection(self) -> float: + """Returns the protection of building environment matrix.""" + return self.env_protection + + @property + def dim_out(self): + """Returns the output dimension of this descriptor.""" + return self.filter_neuron[-1] * self.axis_neuron + + @property + def dim_in(self): + """Returns the atomic input dimension of this descriptor.""" + return self.tebd_dim + + @property + def dim_emb(self): + """Returns the output dimension of embedding.""" + return self.get_dim_emb() + + def compute_input_stats( + self, + merged: Union[Callable[[], list[dict]], list[dict]], + path: Optional[DPPath] = None, + ) -> NoReturn: + """Compute the input statistics (e.g. mean and stddev) for the descriptors from packed data.""" + raise NotImplementedError + + def get_stats(self) -> NoReturn: + """Get the statistics of the descriptor.""" + raise NotImplementedError + + def reinit_exclude( + self, + exclude_types: list[tuple[int, int]] = [], + ) -> None: + self.exclude_types = exclude_types + self.emask = PairExcludeMask(self.ntypes, exclude_types=exclude_types) + + def cal_g( + self, + ss, + embedding_idx, + ): + xp = array_api_compat.array_namespace(ss) + nfnl, nnei = ss.shape[0:2] + shape2 = math.prod(ss.shape[2:]) + ss = xp.reshape(ss, (nfnl, nnei, shape2)) + # nfnl x nnei x ng + gg = self.embeddings[embedding_idx].call(ss) + return gg + + def cal_g_strip( + self, + ss, + embedding_idx, + ): + assert self.embeddings_strip is not None + # nfnl x nnei x ng + gg = self.embeddings_strip[embedding_idx].call(ss) + return gg + + def call( + self, + nlist: np.ndarray, + coord_ext: np.ndarray, + atype_ext: np.ndarray, + atype_embd_ext: Optional[np.ndarray] = None, + mapping: Optional[np.ndarray] = None, + type_embedding: Optional[np.ndarray] = None, + ): + xp = array_api_compat.array_namespace(nlist, coord_ext, atype_ext) + # nf x nloc x nnei x 4 + dmatrix, diff, sw = self.env_mat.call( + coord_ext, atype_ext, nlist, self.mean, self.stddev + ) + nf, nloc, nnei, _ = dmatrix.shape + atype = atype_ext[:, :nloc] + exclude_mask = self.emask.build_type_exclude_mask(nlist, atype_ext) + # nfnl x nnei + exclude_mask = xp.reshape(exclude_mask, (nf * nloc, nnei)) + # nfnl x nnei + nlist = xp.reshape(nlist, (nf * nloc, nnei)) + nlist = xp.where(exclude_mask, nlist, xp.full_like(nlist, -1)) + # nfnl x nnei x 4 + dmatrix = xp.reshape(dmatrix, (nf * nloc, nnei, 4)) + # nfnl x nnei x 1 + sw = xp.reshape(sw, (nf * nloc, nnei, 1)) + # nfnl x nnei + nlist_mask = nlist != -1 + # nfnl x nnei x 1 + sw = xp.where(nlist_mask[:, :, None], sw, xp.full_like(sw, 0.0)) + nlist_masked = xp.where(nlist_mask, nlist, xp.zeros_like(nlist)) + ng = self.neuron[-1] + nt = self.tebd_dim + # nfnl x nnei x 4 + rr = xp.reshape(dmatrix, (nf * nloc, nnei, 4)) + rr = rr * xp.astype(exclude_mask[:, :, None], rr.dtype) + # nfnl x nnei x 1 + ss = rr[..., 0:1] + if self.tebd_input_mode in ["concat"]: + # nfnl x tebd_dim + atype_embd = xp.reshape( + atype_embd_ext[:, :nloc, :], (nf * nloc, self.tebd_dim) + ) + # nfnl x nnei x tebd_dim + atype_embd_nnei = xp.tile(atype_embd[:, xp.newaxis, :], (1, nnei, 1)) + index = xp.tile( + xp.reshape(nlist_masked, (nf, -1, 1)), (1, 1, self.tebd_dim) + ) + # nfnl x nnei x tebd_dim + atype_embd_nlist = xp_take_along_axis(atype_embd_ext, index, axis=1) + atype_embd_nlist = xp.reshape( + atype_embd_nlist, (nf * nloc, nnei, self.tebd_dim) + ) + if not self.type_one_side: + # nfnl x nnei x (1 + 2 * tebd_dim) + ss = xp.concat([ss, atype_embd_nlist, atype_embd_nnei], axis=-1) + else: + # nfnl x nnei x (1 + tebd_dim) + ss = xp.concat([ss, atype_embd_nlist], axis=-1) + # calculate gg + # nfnl x nnei x ng + gg = self.cal_g(ss, 0) + elif self.tebd_input_mode in ["strip"]: + # nfnl x nnei x ng + gg_s = self.cal_g(ss, 0) + assert self.embeddings_strip is not None + assert type_embedding is not None + ntypes_with_padding = type_embedding.shape[0] + # nf x (nl x nnei) + nlist_index = xp.reshape(nlist_masked, (nf, nloc * nnei)) + # nf x (nl x nnei) + nei_type = xp_take_along_axis(atype_ext, nlist_index, axis=1) + # (nf x nl x nnei) x ng + nei_type_index = xp.tile(xp.reshape(nei_type, (-1, 1)), (1, ng)) + if self.type_one_side: + tt_full = self.cal_g_strip(type_embedding, 0) + # (nf x nl x nnei) x ng + gg_t = xp_take_along_axis(tt_full, nei_type_index, axis=0) + else: + idx_i = xp.reshape( + xp.tile( + (xp.reshape(atype, (-1, 1)) * ntypes_with_padding), (1, nnei) + ), + (-1), + ) + idx_j = xp.reshape(nei_type, (-1,)) + # (nf x nl x nnei) x ng + idx = xp.tile(xp.reshape((idx_i + idx_j), (-1, 1)), (1, ng)) + # (ntypes) * ntypes * nt + type_embedding_nei = xp.tile( + xp.reshape(type_embedding, (1, ntypes_with_padding, nt)), + (ntypes_with_padding, 1, 1), + ) + # ntypes * (ntypes) * nt + type_embedding_center = xp.tile( + xp.reshape(type_embedding, (ntypes_with_padding, 1, nt)), + (1, ntypes_with_padding, 1), + ) + # (ntypes * ntypes) * (nt+nt) + two_side_type_embedding = xp.reshape( + xp.concat([type_embedding_nei, type_embedding_center], axis=-1), + (-1, nt * 2), + ) + tt_full = self.cal_g_strip(two_side_type_embedding, 0) + # (nf x nl x nnei) x ng + gg_t = xp_take_along_axis(tt_full, idx, axis=0) + # (nf x nl) x nnei x ng + gg_t = xp.reshape(gg_t, (nf * nloc, nnei, ng)) + if self.smooth: + gg_t = gg_t * xp.reshape(sw, (-1, self.nnei, 1)) + # nfnl x nnei x ng + gg = gg_s * gg_t + gg_s + else: + raise NotImplementedError + + normed = safe_for_vector_norm( + xp.reshape(rr, (-1, nnei, 4))[:, :, 1:4], axis=-1, keepdims=True + ) + input_r = xp.reshape(rr, (-1, nnei, 4))[:, :, 1:4] / xp.maximum( + normed, + xp.full_like(normed, 1e-12), + ) + gg = self.dpa1_attention( + gg, nlist_mask, input_r=input_r, sw=sw + ) # shape is [nframes*nloc, self.neei, out_size] + # nfnl x ng x 4 + # gr = xp.einsum("lni,lnj->lij", gg, rr) + gr = xp.sum(gg[:, :, :, None] * rr[:, :, None, :], axis=1) + gr /= self.nnei + gr1 = gr[:, : self.axis_neuron, :] + # nfnl x ng x ng1 + # grrg = xp.einsum("lid,ljd->lij", gr, gr1) + grrg = xp.sum(gr[:, :, None, :] * gr1[:, None, :, :], axis=3) + # nf x nloc x (ng x ng1) + grrg = xp.astype( + xp.reshape(grrg, (nf, nloc, ng * self.axis_neuron)), coord_ext.dtype + ) + return ( + xp.reshape(grrg, (nf, nloc, self.filter_neuron[-1] * self.axis_neuron)), + xp.reshape(gg, (nf, nloc, self.nnei, self.filter_neuron[-1])), + xp.reshape(dmatrix, (nf, nloc, self.nnei, 4))[..., 1:], + xp.reshape(gr[..., 1:], (nf, nloc, self.filter_neuron[-1], 3)), + xp.reshape(sw, (nf, nloc, nnei, 1)), + ) + + def has_message_passing(self) -> bool: + """Returns whether the descriptor block has message passing.""" + return False + + def need_sorted_nlist_for_lower(self) -> bool: + """Returns whether the descriptor block needs sorted nlist when using `forward_lower`.""" + return False + + def serialize(self) -> dict: + """Serialize the descriptor to dict.""" + obj = self + data = { + "@class": "DescriptorBlock", + "type": "dpa1", + "@version": 1, + "rcut": obj.rcut, + "rcut_smth": obj.rcut_smth, + "sel": obj.sel, + "ntypes": obj.ntypes, + "neuron": obj.neuron, + "axis_neuron": obj.axis_neuron, + "tebd_dim": obj.tebd_dim, + "tebd_input_mode": obj.tebd_input_mode, + "set_davg_zero": obj.set_davg_zero, + "attn": obj.attn, + "attn_layer": obj.attn_layer, + "attn_dotr": obj.attn_dotr, + "attn_mask": obj.attn_mask, + "activation_function": obj.activation_function, + "resnet_dt": obj.resnet_dt, + "scaling_factor": obj.scaling_factor, + "normalize": obj.normalize, + "temperature": obj.temperature, + "trainable_ln": obj.trainable_ln, + "ln_eps": obj.ln_eps, + "smooth": obj.smooth, + "type_one_side": obj.type_one_side, + # make deterministic + "precision": np.dtype(PRECISION_DICT[obj.precision]).name, + "embeddings": obj.embeddings.serialize(), + "attention_layers": obj.dpa1_attention.serialize(), + "env_mat": obj.env_mat.serialize(), + "exclude_types": obj.exclude_types, + "env_protection": obj.env_protection, + "@variables": { + "davg": to_numpy_array(obj["davg"]), + "dstd": to_numpy_array(obj["dstd"]), + }, + } + if obj.tebd_input_mode in ["strip"]: + data.update({"embeddings_strip": obj.embeddings_strip.serialize()}) + return data + + @classmethod + def deserialize(cls, data: dict) -> "DescrptDPA1": + """Deserialize from dict.""" + data = data.copy() + check_version_compatibility(data.pop("@version"), 1, 1) + data.pop("@class") + data.pop("type") + variables = data.pop("@variables") + embeddings = data.pop("embeddings") + attention_layers = data.pop("attention_layers") + env_mat = data.pop("env_mat") + tebd_input_mode = data["tebd_input_mode"] + if tebd_input_mode in ["strip"]: + embeddings_strip = data.pop("embeddings_strip") + else: + embeddings_strip = None + obj = cls(**data) + + obj["davg"] = variables["davg"] + obj["dstd"] = variables["dstd"] + obj.embeddings = NetworkCollection.deserialize(embeddings) + if tebd_input_mode in ["strip"]: + obj.embeddings_strip = NetworkCollection.deserialize(embeddings_strip) + obj.dpa1_attention = NeighborGatedAttention.deserialize(attention_layers) + return obj + + +class NeighborGatedAttention(NativeOP): + def __init__( + self, + layer_num: int, + nnei: int, + embed_dim: int, + hidden_dim: int, + dotr: bool = False, + do_mask: bool = False, + scaling_factor: float = 1.0, + normalize: bool = True, + temperature: Optional[float] = None, + trainable_ln: bool = True, + ln_eps: float = 1e-5, + smooth: bool = True, + precision: str = DEFAULT_PRECISION, + seed: Optional[Union[int, list[int]]] = None, + ) -> None: + """Construct a neighbor-wise attention net.""" + super().__init__() + self.layer_num = layer_num + self.nnei = nnei + self.embed_dim = embed_dim + self.hidden_dim = hidden_dim + self.dotr = dotr + self.do_mask = do_mask + self.scaling_factor = scaling_factor + self.normalize = normalize + self.temperature = temperature + self.trainable_ln = trainable_ln + self.ln_eps = ln_eps + self.smooth = smooth + self.precision = precision + self.network_type = NeighborGatedAttentionLayer + + self.attention_layers = [ + NeighborGatedAttentionLayer( + nnei, + embed_dim, + hidden_dim, + dotr=dotr, + do_mask=do_mask, + scaling_factor=scaling_factor, + normalize=normalize, + temperature=temperature, + trainable_ln=trainable_ln, + ln_eps=ln_eps, + smooth=smooth, + precision=precision, + seed=child_seed(seed, ii), + ) + for ii in range(layer_num) + ] + + def call( + self, + input_G, + nei_mask, + input_r: Optional[np.ndarray] = None, + sw: Optional[np.ndarray] = None, + ): + out = input_G + for layer in self.attention_layers: + out = layer(out, nei_mask, input_r=input_r, sw=sw) + return out + + def __getitem__(self, key): + if isinstance(key, int): + return self.attention_layers[key] + else: + raise TypeError(key) + + def __setitem__(self, key, value) -> None: + if not isinstance(key, int): + raise TypeError(key) + if isinstance(value, self.network_type): + pass + elif isinstance(value, dict): + value = self.network_type.deserialize(value) + else: + raise TypeError(value) + self.attention_layers[key] = value + + def serialize(self): + """Serialize the networks to a dict. + + Returns + ------- + dict + The serialized networks. + """ + return { + "@class": "NeighborGatedAttention", + "@version": 1, + "layer_num": self.layer_num, + "nnei": self.nnei, + "embed_dim": self.embed_dim, + "hidden_dim": self.hidden_dim, + "dotr": self.dotr, + "do_mask": self.do_mask, + "scaling_factor": self.scaling_factor, + "normalize": self.normalize, + "temperature": self.temperature, + "trainable_ln": self.trainable_ln, + "ln_eps": self.ln_eps, + "precision": self.precision, + "attention_layers": [layer.serialize() for layer in self.attention_layers], + } + + @classmethod + def deserialize(cls, data: dict) -> "NeighborGatedAttention": + """Deserialize the networks from a dict. + + Parameters + ---------- + data : dict + The dict to deserialize from. + """ + data = data.copy() + check_version_compatibility(data.pop("@version"), 1, 1) + data.pop("@class") + attention_layers = data.pop("attention_layers") + obj = cls(**data) + obj.attention_layers = [ + NeighborGatedAttentionLayer.deserialize(layer) for layer in attention_layers + ] + return obj + + +class NeighborGatedAttentionLayer(NativeOP): + def __init__( + self, + nnei: int, + embed_dim: int, + hidden_dim: int, + dotr: bool = False, + do_mask: bool = False, + scaling_factor: float = 1.0, + normalize: bool = True, + temperature: Optional[float] = None, + trainable_ln: bool = True, + ln_eps: float = 1e-5, + smooth: bool = True, + precision: str = DEFAULT_PRECISION, + seed: Optional[Union[int, list[int]]] = None, + ) -> None: + """Construct a neighbor-wise attention layer.""" + super().__init__() + self.nnei = nnei + self.embed_dim = embed_dim + self.hidden_dim = hidden_dim + self.dotr = dotr + self.do_mask = do_mask + self.scaling_factor = scaling_factor + self.normalize = normalize + self.temperature = temperature + self.trainable_ln = trainable_ln + self.ln_eps = ln_eps + self.precision = precision + self.attention_layer = GatedAttentionLayer( + nnei, + embed_dim, + hidden_dim, + dotr=dotr, + do_mask=do_mask, + scaling_factor=scaling_factor, + normalize=normalize, + temperature=temperature, + smooth=smooth, + precision=precision, + seed=child_seed(seed, 0), + ) + self.attn_layer_norm = LayerNorm( + self.embed_dim, + eps=ln_eps, + trainable=self.trainable_ln, + precision=precision, + seed=child_seed(seed, 1), + ) + + def call( + self, + x, + nei_mask, + input_r: Optional[np.ndarray] = None, + sw: Optional[np.ndarray] = None, + ): + residual = x + x, _ = self.attention_layer(x, nei_mask, input_r=input_r, sw=sw) + x = residual + x + x = self.attn_layer_norm(x) + return x + + def serialize(self) -> dict: + """Serialize the networks to a dict. + + Returns + ------- + dict + The serialized networks. + """ + return { + "nnei": self.nnei, + "embed_dim": self.embed_dim, + "hidden_dim": self.hidden_dim, + "dotr": self.dotr, + "do_mask": self.do_mask, + "scaling_factor": self.scaling_factor, + "normalize": self.normalize, + "temperature": self.temperature, + "trainable_ln": self.trainable_ln, + "ln_eps": self.ln_eps, + "precision": self.precision, + "attention_layer": self.attention_layer.serialize(), + "attn_layer_norm": self.attn_layer_norm.serialize(), + } + + @classmethod + def deserialize(cls, data) -> "NeighborGatedAttentionLayer": + """Deserialize the networks from a dict. + + Parameters + ---------- + data : dict + The dict to deserialize from. + """ + data = data.copy() + attention_layer = data.pop("attention_layer") + attn_layer_norm = data.pop("attn_layer_norm") + obj = cls(**data) + obj.attention_layer = GatedAttentionLayer.deserialize(attention_layer) + obj.attn_layer_norm = LayerNorm.deserialize(attn_layer_norm) + return obj + + +class GatedAttentionLayer(NativeOP): + def __init__( + self, + nnei: int, + embed_dim: int, + hidden_dim: int, + num_heads: int = 1, + dotr: bool = False, + do_mask: bool = False, + scaling_factor: float = 1.0, + normalize: bool = True, + temperature: Optional[float] = None, + bias: bool = True, + smooth: bool = True, + precision: str = DEFAULT_PRECISION, + seed: Optional[Union[int, list[int]]] = None, + ) -> None: + """Construct a multi-head neighbor-wise attention net.""" + super().__init__() + assert hidden_dim % num_heads == 0, "hidden_dim must be divisible by num_heads" + self.nnei = nnei + self.embed_dim = embed_dim + self.hidden_dim = hidden_dim + self.num_heads = num_heads + self.head_dim = hidden_dim // num_heads + self.dotr = dotr + self.do_mask = do_mask + self.bias = bias + self.smooth = smooth + self.scaling_factor = scaling_factor + self.temperature = temperature + self.precision = precision + self.scaling = ( + (self.head_dim * scaling_factor) ** -0.5 + if temperature is None + else temperature + ) + self.normalize = normalize + self.in_proj = NativeLayer( + embed_dim, + hidden_dim * 3, + bias=bias, + use_timestep=False, + precision=precision, + seed=child_seed(seed, 0), + ) + self.out_proj = NativeLayer( + hidden_dim, + embed_dim, + bias=bias, + use_timestep=False, + precision=precision, + seed=child_seed(seed, 1), + ) + + def call(self, query, nei_mask, input_r=None, sw=None, attnw_shift=20.0): + xp = array_api_compat.array_namespace(query, nei_mask) + # Linear projection + # q, k, v = xp.split(self.in_proj(query), 3, axis=-1) + _query = self.in_proj(query) + q = _query[..., 0 : self.head_dim] + k = _query[..., self.head_dim : self.head_dim * 2] + v = _query[..., self.head_dim * 2 : self.head_dim * 3] + # Reshape and normalize + # (nf x nloc) x num_heads x nnei x head_dim + q = xp.permute_dims( + xp.reshape(q, (-1, self.nnei, self.num_heads, self.head_dim)), (0, 2, 1, 3) + ) + k = xp.permute_dims( + xp.reshape(k, (-1, self.nnei, self.num_heads, self.head_dim)), (0, 2, 1, 3) + ) + v = xp.permute_dims( + xp.reshape(v, (-1, self.nnei, self.num_heads, self.head_dim)), (0, 2, 1, 3) + ) + if self.normalize: + q = np_normalize(q, axis=-1) + k = np_normalize(k, axis=-1) + v = np_normalize(v, axis=-1) + q = q * self.scaling + # Attention weights + # (nf x nloc) x num_heads x nnei x nnei + attn_weights = q @ xp.permute_dims(k, (0, 1, 3, 2)) + nei_mask = xp.reshape(nei_mask, (-1, self.nnei)) + if self.smooth: + sw = xp.reshape(sw, (-1, 1, self.nnei)) + attn_weights = (attn_weights + attnw_shift) * sw[:, :, :, None] * sw[ + :, :, None, : + ] - attnw_shift + else: + attn_weights = xp.where( + nei_mask[:, None, None, :], + attn_weights, + xp.full_like(attn_weights, -xp.inf), + ) + attn_weights = np_softmax(attn_weights, axis=-1) + attn_weights = xp.where( + nei_mask[:, None, :, None], attn_weights, xp.zeros_like(attn_weights) + ) + if self.smooth: + attn_weights = attn_weights * sw[:, :, :, None] * sw[:, :, None, :] + if self.dotr: + angular_weight = xp.reshape( + input_r @ xp.permute_dims(input_r, (0, 2, 1)), + (-1, 1, self.nnei, self.nnei), + ) + attn_weights = attn_weights * angular_weight + # Output projection + # (nf x nloc) x num_heads x nnei x head_dim + o = attn_weights @ v + # (nf x nloc) x nnei x (num_heads x head_dim) + o = xp.reshape( + xp.permute_dims(o, (0, 2, 1, 3)), (-1, self.nnei, self.hidden_dim) + ) + output = self.out_proj(o) + return output, attn_weights + + def serialize(self): + return { + "nnei": self.nnei, + "embed_dim": self.embed_dim, + "hidden_dim": self.hidden_dim, + "num_heads": self.num_heads, + "dotr": self.dotr, + "do_mask": self.do_mask, + "scaling_factor": self.scaling_factor, + "normalize": self.normalize, + "temperature": self.temperature, + "bias": self.bias, + "smooth": self.smooth, + "precision": self.precision, + "in_proj": self.in_proj.serialize(), + "out_proj": self.out_proj.serialize(), + } + + @classmethod + def deserialize(cls, data): + data = data.copy() + in_proj = data.pop("in_proj") + out_proj = data.pop("out_proj") + obj = cls(**data) + obj.in_proj = NativeLayer.deserialize(in_proj) + obj.out_proj = NativeLayer.deserialize(out_proj) + return obj diff --git a/deepmd/dpmodel/descriptor/dpa2.py b/deepmd/dpmodel/descriptor/dpa2.py new file mode 100644 index 0000000000..e4cadb7b36 --- /dev/null +++ b/deepmd/dpmodel/descriptor/dpa2.py @@ -0,0 +1,1085 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from typing import ( + NoReturn, + Optional, + Union, +) + +import array_api_compat +import numpy as np + +from deepmd.dpmodel import ( + NativeOP, +) +from deepmd.dpmodel.array_api import ( + xp_take_along_axis, +) +from deepmd.dpmodel.common import ( + cast_precision, + to_numpy_array, +) +from deepmd.dpmodel.utils import ( + EnvMat, + NetworkCollection, +) +from deepmd.dpmodel.utils.network import ( + Identity, + NativeLayer, +) +from deepmd.dpmodel.utils.nlist import ( + build_multiple_neighbor_list, + get_multiple_nlist_key, +) +from deepmd.dpmodel.utils.seed import ( + child_seed, +) +from deepmd.dpmodel.utils.type_embed import ( + TypeEmbedNet, +) +from deepmd.dpmodel.utils.update_sel import ( + UpdateSel, +) +from deepmd.utils.data_system import ( + DeepmdDataSystem, +) +from deepmd.utils.finetune import ( + get_index_between_two_maps, + map_pair_exclude_types, +) +from deepmd.utils.path import ( + DPPath, +) +from deepmd.utils.version import ( + check_version_compatibility, +) + +from .base_descriptor import ( + BaseDescriptor, +) +from .descriptor import ( + extend_descrpt_stat, +) +from .dpa1 import ( + DescrptBlockSeAtten, +) +from .repformers import ( + DescrptBlockRepformers, + RepformerLayer, +) +from .se_t_tebd import ( + DescrptBlockSeTTebd, +) + + +class RepinitArgs: + def __init__( + self, + rcut: float, + rcut_smth: float, + nsel: int, + neuron: list[int] = [25, 50, 100], + axis_neuron: int = 16, + tebd_dim: int = 8, + tebd_input_mode: str = "concat", + set_davg_zero: bool = True, + activation_function="tanh", + resnet_dt: bool = False, + type_one_side: bool = False, + use_three_body: bool = False, + three_body_neuron: list[int] = [2, 4, 8], + three_body_sel: int = 40, + three_body_rcut: float = 4.0, + three_body_rcut_smth: float = 0.5, + ) -> None: + r"""The constructor for the RepinitArgs class which defines the parameters of the repinit block in DPA2 descriptor. + + Parameters + ---------- + rcut : float + The cut-off radius. + rcut_smth : float + Where to start smoothing. For example the 1/r term is smoothed from rcut to rcut_smth. + nsel : int + Maximally possible number of selected neighbors. + neuron : list, optional + Number of neurons in each hidden layers of the embedding net. + When two layers are of the same size or one layer is twice as large as the previous layer, + a skip connection is built. + axis_neuron : int, optional + Size of the submatrix of G (embedding matrix). + tebd_dim : int, optional + The dimension of atom type embedding. + tebd_input_mode : str, optional + The input mode of the type embedding. Supported modes are ['concat', 'strip']. + set_davg_zero : bool, optional + Set the normalization average to zero. + activation_function : str, optional + The activation function in the embedding net. + resnet_dt : bool, optional + Whether to use a "Timestep" in the skip connection. + type_one_side : bool, optional + Whether to use one-side type embedding. + use_three_body : bool, optional + Whether to concatenate three-body representation in the output descriptor. + three_body_neuron : list, optional + Number of neurons in each hidden layers of the three-body embedding net. + When two layers are of the same size or one layer is twice as large as the previous layer, + a skip connection is built. + three_body_sel : int, optional + Maximally possible number of selected neighbors in the three-body representation. + three_body_rcut : float, optional + The cut-off radius in the three-body representation. + three_body_rcut_smth : float, optional + Where to start smoothing in the three-body representation. + For example the 1/r term is smoothed from three_body_rcut to three_body_rcut_smth. + """ + self.rcut = rcut + self.rcut_smth = rcut_smth + self.nsel = nsel + self.neuron = neuron + self.axis_neuron = axis_neuron + self.tebd_dim = tebd_dim + self.tebd_input_mode = tebd_input_mode + self.set_davg_zero = set_davg_zero + self.activation_function = activation_function + self.resnet_dt = resnet_dt + self.type_one_side = type_one_side + self.use_three_body = use_three_body + self.three_body_neuron = three_body_neuron + self.three_body_sel = three_body_sel + self.three_body_rcut = three_body_rcut + self.three_body_rcut_smth = three_body_rcut_smth + + def __getitem__(self, key): + if hasattr(self, key): + return getattr(self, key) + else: + raise KeyError(key) + + def serialize(self) -> dict: + return { + "rcut": self.rcut, + "rcut_smth": self.rcut_smth, + "nsel": self.nsel, + "neuron": self.neuron, + "axis_neuron": self.axis_neuron, + "tebd_dim": self.tebd_dim, + "tebd_input_mode": self.tebd_input_mode, + "set_davg_zero": self.set_davg_zero, + "activation_function": self.activation_function, + "resnet_dt": self.resnet_dt, + "type_one_side": self.type_one_side, + "use_three_body": self.use_three_body, + "three_body_neuron": self.three_body_neuron, + "three_body_sel": self.three_body_sel, + "three_body_rcut": self.three_body_rcut, + "three_body_rcut_smth": self.three_body_rcut_smth, + } + + @classmethod + def deserialize(cls, data: dict) -> "RepinitArgs": + return cls(**data) + + +class RepformerArgs: + def __init__( + self, + rcut: float, + rcut_smth: float, + nsel: int, + nlayers: int = 3, + g1_dim: int = 128, + g2_dim: int = 16, + axis_neuron: int = 4, + direct_dist: bool = False, + update_g1_has_conv: bool = True, + update_g1_has_drrd: bool = True, + update_g1_has_grrg: bool = True, + update_g1_has_attn: bool = True, + update_g2_has_g1g1: bool = True, + update_g2_has_attn: bool = True, + update_h2: bool = False, + attn1_hidden: int = 64, + attn1_nhead: int = 4, + attn2_hidden: int = 16, + attn2_nhead: int = 4, + attn2_has_gate: bool = False, + activation_function: str = "tanh", + update_style: str = "res_avg", + update_residual: float = 0.001, + update_residual_init: str = "norm", + set_davg_zero: bool = True, + trainable_ln: bool = True, + use_sqrt_nnei: bool = True, + g1_out_conv: bool = True, + g1_out_mlp: bool = True, + ln_eps: Optional[float] = 1e-5, + ) -> None: + r"""The constructor for the RepformerArgs class which defines the parameters of the repformer block in DPA2 descriptor. + + Parameters + ---------- + rcut : float + The cut-off radius. + rcut_smth : float + Where to start smoothing. For example the 1/r term is smoothed from rcut to rcut_smth. + nsel : int + Maximally possible number of selected neighbors. + nlayers : int, optional + Number of repformer layers. + g1_dim : int, optional + Dimension of the first graph convolution layer. + g2_dim : int, optional + Dimension of the second graph convolution layer. + axis_neuron : int, optional + Size of the submatrix of G (embedding matrix). + direct_dist : bool, optional + Whether to use direct distance information (1/r term) in the repformer block. + update_g1_has_conv : bool, optional + Whether to update the g1 rep with convolution term. + update_g1_has_drrd : bool, optional + Whether to update the g1 rep with the drrd term. + update_g1_has_grrg : bool, optional + Whether to update the g1 rep with the grrg term. + update_g1_has_attn : bool, optional + Whether to update the g1 rep with the localized self-attention. + update_g2_has_g1g1 : bool, optional + Whether to update the g2 rep with the g1xg1 term. + update_g2_has_attn : bool, optional + Whether to update the g2 rep with the gated self-attention. + update_h2 : bool, optional + Whether to update the h2 rep. + attn1_hidden : int, optional + The hidden dimension of localized self-attention to update the g1 rep. + attn1_nhead : int, optional + The number of heads in localized self-attention to update the g1 rep. + attn2_hidden : int, optional + The hidden dimension of gated self-attention to update the g2 rep. + attn2_nhead : int, optional + The number of heads in gated self-attention to update the g2 rep. + attn2_has_gate : bool, optional + Whether to use gate in the gated self-attention to update the g2 rep. + activation_function : str, optional + The activation function in the embedding net. + update_style : str, optional + Style to update a representation. + Supported options are: + -'res_avg': Updates a rep `u` with: u = 1/\\sqrt{n+1} (u + u_1 + u_2 + ... + u_n) + -'res_incr': Updates a rep `u` with: u = u + 1/\\sqrt{n} (u_1 + u_2 + ... + u_n) + -'res_residual': Updates a rep `u` with: u = u + (r1*u_1 + r2*u_2 + ... + r3*u_n) + where `r1`, `r2` ... `r3` are residual weights defined by `update_residual` + and `update_residual_init`. + update_residual : float, optional + When update using residual mode, the initial std of residual vector weights. + update_residual_init : str, optional + When update using residual mode, the initialization mode of residual vector weights. + set_davg_zero : bool, optional + Set the normalization average to zero. + trainable_ln : bool, optional + Whether to use trainable shift and scale weights in layer normalization. + use_sqrt_nnei : bool, optional + Whether to use the square root of the number of neighbors for symmetrization_op normalization instead of using the number of neighbors directly. + g1_out_conv : bool, optional + Whether to put the convolutional update of g1 separately outside the concatenated MLP update. + g1_out_mlp : bool, optional + Whether to put the self MLP update of g1 separately outside the concatenated MLP update. + ln_eps : float, optional + The epsilon value for layer normalization. + """ + self.rcut = rcut + self.rcut_smth = rcut_smth + self.nsel = nsel + self.nlayers = nlayers + self.g1_dim = g1_dim + self.g2_dim = g2_dim + self.axis_neuron = axis_neuron + self.direct_dist = direct_dist + self.update_g1_has_conv = update_g1_has_conv + self.update_g1_has_drrd = update_g1_has_drrd + self.update_g1_has_grrg = update_g1_has_grrg + self.update_g1_has_attn = update_g1_has_attn + self.update_g2_has_g1g1 = update_g2_has_g1g1 + self.update_g2_has_attn = update_g2_has_attn + self.update_h2 = update_h2 + self.attn1_hidden = attn1_hidden + self.attn1_nhead = attn1_nhead + self.attn2_hidden = attn2_hidden + self.attn2_nhead = attn2_nhead + self.attn2_has_gate = attn2_has_gate + self.activation_function = activation_function + self.update_style = update_style + self.update_residual = update_residual + self.update_residual_init = update_residual_init + self.set_davg_zero = set_davg_zero + self.trainable_ln = trainable_ln + self.use_sqrt_nnei = use_sqrt_nnei + self.g1_out_conv = g1_out_conv + self.g1_out_mlp = g1_out_mlp + # to keep consistent with default value in this backends + if ln_eps is None: + ln_eps = 1e-5 + self.ln_eps = ln_eps + + def __getitem__(self, key): + if hasattr(self, key): + return getattr(self, key) + else: + raise KeyError(key) + + def serialize(self) -> dict: + return { + "rcut": self.rcut, + "rcut_smth": self.rcut_smth, + "nsel": self.nsel, + "nlayers": self.nlayers, + "g1_dim": self.g1_dim, + "g2_dim": self.g2_dim, + "axis_neuron": self.axis_neuron, + "direct_dist": self.direct_dist, + "update_g1_has_conv": self.update_g1_has_conv, + "update_g1_has_drrd": self.update_g1_has_drrd, + "update_g1_has_grrg": self.update_g1_has_grrg, + "update_g1_has_attn": self.update_g1_has_attn, + "update_g2_has_g1g1": self.update_g2_has_g1g1, + "update_g2_has_attn": self.update_g2_has_attn, + "update_h2": self.update_h2, + "attn1_hidden": self.attn1_hidden, + "attn1_nhead": self.attn1_nhead, + "attn2_hidden": self.attn2_hidden, + "attn2_nhead": self.attn2_nhead, + "attn2_has_gate": self.attn2_has_gate, + "activation_function": self.activation_function, + "update_style": self.update_style, + "update_residual": self.update_residual, + "update_residual_init": self.update_residual_init, + "set_davg_zero": self.set_davg_zero, + "trainable_ln": self.trainable_ln, + "use_sqrt_nnei": self.use_sqrt_nnei, + "g1_out_conv": self.g1_out_conv, + "g1_out_mlp": self.g1_out_mlp, + "ln_eps": self.ln_eps, + } + + @classmethod + def deserialize(cls, data: dict) -> "RepformerArgs": + return cls(**data) + + +@BaseDescriptor.register("dpa2") +class DescrptDPA2(NativeOP, BaseDescriptor): + def __init__( + self, + ntypes: int, + # args for repinit + repinit: Union[RepinitArgs, dict], + # args for repformer + repformer: Union[RepformerArgs, dict], + # kwargs for descriptor + concat_output_tebd: bool = True, + precision: str = "float64", + smooth: bool = True, + exclude_types: list[tuple[int, int]] = [], + env_protection: float = 0.0, + trainable: bool = True, + seed: Optional[Union[int, list[int]]] = None, + add_tebd_to_repinit_out: bool = False, + use_econf_tebd: bool = False, + use_tebd_bias: bool = False, + type_map: Optional[list[str]] = None, + ) -> None: + r"""The DPA-2 descriptor. see https://arxiv.org/abs/2312.15492. + + Parameters + ---------- + repinit : Union[RepinitArgs, dict] + The arguments used to initialize the repinit block, see docstr in `RepinitArgs` for details information. + repformer : Union[RepformerArgs, dict] + The arguments used to initialize the repformer block, see docstr in `RepformerArgs` for details information. + concat_output_tebd : bool, optional + Whether to concat type embedding at the output of the descriptor. + precision : str, optional + The precision of the embedding net parameters. + smooth : bool, optional + Whether to use smoothness in processes such as attention weights calculation. + exclude_types : list[list[int]], optional + The excluded pairs of types which have no interaction with each other. + For example, `[[0, 1]]` means no interaction between type 0 and type 1. + env_protection : float, optional + Protection parameter to prevent division by zero errors during environment matrix calculations. + For example, when using paddings, there may be zero distances of neighbors, which may make division by zero error during environment matrix calculations without protection. + trainable : bool, optional + If the parameters are trainable. + seed : int, optional + (Unused yet) Random seed for parameter initialization. + add_tebd_to_repinit_out : bool, optional + Whether to add type embedding to the output representation from repinit before inputting it into repformer. + use_econf_tebd : bool, Optional + Whether to use electronic configuration type embedding. + use_tebd_bias : bool, Optional + Whether to use bias in the type embedding layer. + type_map : list[str], Optional + A list of strings. Give the name to each type of atoms. + + Returns + ------- + descriptor: torch.Tensor + the descriptor of shape nf x nloc x g1_dim. + invariant single-atom representation. + g2: torch.Tensor + invariant pair-atom representation. + h2: torch.Tensor + equivariant pair-atom representation. + rot_mat: torch.Tensor + rotation matrix for equivariant fittings + sw: torch.Tensor + The switch function for decaying inverse distance. + + """ + + def init_subclass_params(sub_data, sub_class): + if isinstance(sub_data, dict): + return sub_class(**sub_data) + elif isinstance(sub_data, sub_class): + return sub_data + else: + raise ValueError( + f"Input args must be a {sub_class.__name__} class or a dict!" + ) + + self.repinit_args = init_subclass_params(repinit, RepinitArgs) + self.repformer_args = init_subclass_params(repformer, RepformerArgs) + + self.repinit = DescrptBlockSeAtten( + self.repinit_args.rcut, + self.repinit_args.rcut_smth, + self.repinit_args.nsel, + ntypes, + attn_layer=0, + neuron=self.repinit_args.neuron, + axis_neuron=self.repinit_args.axis_neuron, + tebd_dim=self.repinit_args.tebd_dim, + tebd_input_mode=self.repinit_args.tebd_input_mode, + set_davg_zero=self.repinit_args.set_davg_zero, + exclude_types=exclude_types, + env_protection=env_protection, + activation_function=self.repinit_args.activation_function, + precision=precision, + resnet_dt=self.repinit_args.resnet_dt, + smooth=smooth, + type_one_side=self.repinit_args.type_one_side, + seed=child_seed(seed, 0), + ) + self.use_three_body = self.repinit_args.use_three_body + if self.use_three_body: + self.repinit_three_body = DescrptBlockSeTTebd( + self.repinit_args.three_body_rcut, + self.repinit_args.three_body_rcut_smth, + self.repinit_args.three_body_sel, + ntypes, + neuron=self.repinit_args.three_body_neuron, + tebd_dim=self.repinit_args.tebd_dim, + tebd_input_mode=self.repinit_args.tebd_input_mode, + set_davg_zero=self.repinit_args.set_davg_zero, + exclude_types=exclude_types, + env_protection=env_protection, + activation_function=self.repinit_args.activation_function, + precision=precision, + resnet_dt=self.repinit_args.resnet_dt, + smooth=smooth, + seed=child_seed(seed, 5), + ) + else: + self.repinit_three_body = None + self.repformers = DescrptBlockRepformers( + self.repformer_args.rcut, + self.repformer_args.rcut_smth, + self.repformer_args.nsel, + ntypes, + nlayers=self.repformer_args.nlayers, + g1_dim=self.repformer_args.g1_dim, + g2_dim=self.repformer_args.g2_dim, + axis_neuron=self.repformer_args.axis_neuron, + direct_dist=self.repformer_args.direct_dist, + update_g1_has_conv=self.repformer_args.update_g1_has_conv, + update_g1_has_drrd=self.repformer_args.update_g1_has_drrd, + update_g1_has_grrg=self.repformer_args.update_g1_has_grrg, + update_g1_has_attn=self.repformer_args.update_g1_has_attn, + update_g2_has_g1g1=self.repformer_args.update_g2_has_g1g1, + update_g2_has_attn=self.repformer_args.update_g2_has_attn, + update_h2=self.repformer_args.update_h2, + attn1_hidden=self.repformer_args.attn1_hidden, + attn1_nhead=self.repformer_args.attn1_nhead, + attn2_hidden=self.repformer_args.attn2_hidden, + attn2_nhead=self.repformer_args.attn2_nhead, + attn2_has_gate=self.repformer_args.attn2_has_gate, + activation_function=self.repformer_args.activation_function, + update_style=self.repformer_args.update_style, + update_residual=self.repformer_args.update_residual, + update_residual_init=self.repformer_args.update_residual_init, + set_davg_zero=self.repformer_args.set_davg_zero, + smooth=smooth, + exclude_types=exclude_types, + env_protection=env_protection, + precision=precision, + trainable_ln=self.repformer_args.trainable_ln, + use_sqrt_nnei=self.repformer_args.use_sqrt_nnei, + g1_out_conv=self.repformer_args.g1_out_conv, + g1_out_mlp=self.repformer_args.g1_out_mlp, + ln_eps=self.repformer_args.ln_eps, + seed=child_seed(seed, 1), + ) + self.rcsl_list = [ + (self.repformers.get_rcut(), self.repformers.get_nsel()), + (self.repinit.get_rcut(), self.repinit.get_nsel()), + ] + if self.use_three_body: + self.rcsl_list.append( + (self.repinit_three_body.get_rcut(), self.repinit_three_body.get_nsel()) + ) + self.rcsl_list.sort() + for ii in range(1, len(self.rcsl_list)): + assert ( + self.rcsl_list[ii - 1][1] <= self.rcsl_list[ii][1] + ), "rcut and sel are not in the same order" + self.rcut_list = [ii[0] for ii in self.rcsl_list] + self.nsel_list = [ii[1] for ii in self.rcsl_list] + self.use_econf_tebd = use_econf_tebd + self.use_tebd_bias = use_tebd_bias + self.type_map = type_map + self.type_embedding = TypeEmbedNet( + ntypes=ntypes, + neuron=[self.repinit_args.tebd_dim], + padding=True, + activation_function="Linear", + precision=precision, + use_econf_tebd=use_econf_tebd, + use_tebd_bias=use_tebd_bias, + type_map=type_map, + seed=child_seed(seed, 2), + ) + self.concat_output_tebd = concat_output_tebd + self.precision = precision + self.smooth = smooth + self.exclude_types = exclude_types + self.env_protection = env_protection + self.trainable = trainable + self.add_tebd_to_repinit_out = add_tebd_to_repinit_out + + self.repinit_out_dim = self.repinit.dim_out + if self.repinit_args.use_three_body: + assert self.repinit_three_body is not None + self.repinit_out_dim += self.repinit_three_body.dim_out + + if self.repinit_out_dim == self.repformers.dim_in: + self.g1_shape_tranform = Identity() + else: + self.g1_shape_tranform = NativeLayer( + self.repinit_out_dim, + self.repformers.dim_in, + bias=False, + precision=precision, + seed=child_seed(seed, 3), + ) + self.tebd_transform = None + if self.add_tebd_to_repinit_out: + self.tebd_transform = NativeLayer( + self.repinit_args.tebd_dim, + self.repformers.dim_in, + bias=False, + precision=precision, + seed=child_seed(seed, 4), + ) + assert self.repinit.rcut > self.repformers.rcut + assert self.repinit.sel[0] > self.repformers.sel[0] + + self.tebd_dim = self.repinit_args.tebd_dim + self.rcut = self.repinit.get_rcut() + self.ntypes = ntypes + self.sel = self.repinit.sel + self.precision = precision + + def get_rcut(self) -> float: + """Returns the cut-off radius.""" + return self.rcut + + def get_rcut_smth(self) -> float: + """Returns the radius where the neighbor information starts to smoothly decay to 0.""" + return self.rcut_smth + + def get_nsel(self) -> int: + """Returns the number of selected atoms in the cut-off radius.""" + return sum(self.sel) + + def get_sel(self) -> list[int]: + """Returns the number of selected atoms for each type.""" + return self.sel + + def get_ntypes(self) -> int: + """Returns the number of element types.""" + return self.ntypes + + def get_type_map(self) -> list[str]: + """Get the name to each type of atoms.""" + return self.type_map + + def get_dim_out(self) -> int: + """Returns the output dimension of this descriptor.""" + ret = self.repformers.dim_out + if self.concat_output_tebd: + ret += self.tebd_dim + return ret + + def get_dim_emb(self) -> int: + """Returns the embedding dimension of this descriptor.""" + return self.repformers.dim_emb + + def mixed_types(self) -> bool: + """If true, the descriptor + 1. assumes total number of atoms aligned across frames; + 2. requires a neighbor list that does not distinguish different atomic types. + + If false, the descriptor + 1. assumes total number of atoms of each atom type aligned across frames; + 2. requires a neighbor list that distinguishes different atomic types. + + """ + return True + + def has_message_passing(self) -> bool: + """Returns whether the descriptor has message passing.""" + return any( + [self.repinit.has_message_passing(), self.repformers.has_message_passing()] + ) + + def need_sorted_nlist_for_lower(self) -> bool: + """Returns whether the descriptor needs sorted nlist when using `forward_lower`.""" + return True + + def get_env_protection(self) -> float: + """Returns the protection of building environment matrix.""" + return self.env_protection + + def share_params(self, base_class, shared_level, resume=False) -> NoReturn: + """ + Share the parameters of self to the base_class with shared_level during multitask training. + If not start from checkpoint (resume is False), + some separated parameters (e.g. mean and stddev) will be re-calculated across different classes. + """ + raise NotImplementedError + + def change_type_map( + self, type_map: list[str], model_with_new_type_stat=None + ) -> None: + """Change the type related params to new ones, according to `type_map` and the original one in the model. + If there are new types in `type_map`, statistics will be updated accordingly to `model_with_new_type_stat` for these new types. + """ + assert ( + self.type_map is not None + ), "'type_map' must be defined when performing type changing!" + remap_index, has_new_type = get_index_between_two_maps(self.type_map, type_map) + self.type_map = type_map + self.type_embedding.change_type_map(type_map=type_map) + self.exclude_types = map_pair_exclude_types(self.exclude_types, remap_index) + self.ntypes = len(type_map) + repinit = self.repinit + repformers = self.repformers + repinit_three_body = self.repinit_three_body + if has_new_type: + # the avg and std of new types need to be updated + extend_descrpt_stat( + repinit, + type_map, + des_with_stat=model_with_new_type_stat.repinit + if model_with_new_type_stat is not None + else None, + ) + extend_descrpt_stat( + repformers, + type_map, + des_with_stat=model_with_new_type_stat.repformers + if model_with_new_type_stat is not None + else None, + ) + if self.use_three_body: + extend_descrpt_stat( + repinit_three_body, + type_map, + des_with_stat=model_with_new_type_stat.repinit_three_body + if model_with_new_type_stat is not None + else None, + ) + repinit.ntypes = self.ntypes + repformers.ntypes = self.ntypes + repinit.reinit_exclude(self.exclude_types) + repformers.reinit_exclude(self.exclude_types) + repinit["davg"] = repinit["davg"][remap_index] + repinit["dstd"] = repinit["dstd"][remap_index] + repformers["davg"] = repformers["davg"][remap_index] + repformers["dstd"] = repformers["dstd"][remap_index] + if self.use_three_body: + repinit_three_body.ntypes = self.ntypes + repinit_three_body.reinit_exclude(self.exclude_types) + repinit_three_body["davg"] = repinit_three_body["davg"][remap_index] + repinit_three_body["dstd"] = repinit_three_body["dstd"][remap_index] + + @property + def dim_out(self): + return self.get_dim_out() + + @property + def dim_emb(self): + """Returns the embedding dimension g2.""" + return self.get_dim_emb() + + def compute_input_stats( + self, merged: list[dict], path: Optional[DPPath] = None + ) -> NoReturn: + """Update mean and stddev for descriptor elements.""" + raise NotImplementedError + + def set_stat_mean_and_stddev( + self, + mean: list[np.ndarray], + stddev: list[np.ndarray], + ) -> None: + """Update mean and stddev for descriptor.""" + descrpt_list = [self.repinit, self.repformers] + if self.use_three_body: + descrpt_list.append(self.repinit_three_body) + for ii, descrpt in enumerate(descrpt_list): + descrpt.mean = mean[ii] + descrpt.stddev = stddev[ii] + + def get_stat_mean_and_stddev(self) -> tuple[list[np.ndarray], list[np.ndarray]]: + """Get mean and stddev for descriptor.""" + mean_list = [self.repinit.mean, self.repformers.mean] + stddev_list = [ + self.repinit.stddev, + self.repformers.stddev, + ] + if self.use_three_body: + mean_list.append(self.repinit_three_body.mean) + stddev_list.append(self.repinit_three_body.stddev) + return mean_list, stddev_list + + @cast_precision + def call( + self, + coord_ext: np.ndarray, + atype_ext: np.ndarray, + nlist: np.ndarray, + mapping: Optional[np.ndarray] = None, + ): + """Compute the descriptor. + + Parameters + ---------- + coord_ext + The extended coordinates of atoms. shape: nf x (nallx3) + atype_ext + The extended aotm types. shape: nf x nall + nlist + The neighbor list. shape: nf x nloc x nnei + mapping + The index mapping, maps extended region index to local region. + + Returns + ------- + descriptor + The descriptor. shape: nf x nloc x (ng x axis_neuron) + gr + The rotationally equivariant and permutationally invariant single particle + representation. shape: nf x nloc x ng x 3 + g2 + The rotationally invariant pair-partical representation. + shape: nf x nloc x nnei x ng + h2 + The rotationally equivariant pair-partical representation. + shape: nf x nloc x nnei x 3 + sw + The smooth switch function. shape: nf x nloc x nnei + + """ + xp = array_api_compat.array_namespace(coord_ext, atype_ext, nlist) + use_three_body = self.use_three_body + nframes, nloc, nnei = nlist.shape + nall = xp.reshape(coord_ext, (nframes, -1)).shape[1] // 3 + # nlists + nlist_dict = build_multiple_neighbor_list( + coord_ext, + nlist, + self.rcut_list, + self.nsel_list, + ) + type_embedding = self.type_embedding.call() + # repinit + g1_ext = xp.reshape( + xp.take(type_embedding, xp.reshape(atype_ext, [-1]), axis=0), + (nframes, nall, self.tebd_dim), + ) + g1_inp = g1_ext[:, :nloc, :] + g1, _, _, _, _ = self.repinit( + nlist_dict[ + get_multiple_nlist_key(self.repinit.get_rcut(), self.repinit.get_nsel()) + ], + coord_ext, + atype_ext, + g1_ext, + mapping, + type_embedding=type_embedding, + ) + if use_three_body: + assert self.repinit_three_body is not None + g1_three_body, __, __, __, __ = self.repinit_three_body( + nlist_dict[ + get_multiple_nlist_key( + self.repinit_three_body.get_rcut(), + self.repinit_three_body.get_nsel(), + ) + ], + coord_ext, + atype_ext, + g1_ext, + mapping, + type_embedding=type_embedding, + ) + g1 = xp.concat([g1, g1_three_body], axis=-1) + # linear to change shape + g1 = self.g1_shape_tranform(g1) + if self.add_tebd_to_repinit_out: + assert self.tebd_transform is not None + g1 = g1 + self.tebd_transform(g1_inp) + # mapping g1 + assert mapping is not None + mapping_ext = xp.tile( + xp.reshape(mapping, (nframes, nall, 1)), (1, 1, g1.shape[-1]) + ) + g1_ext = xp_take_along_axis(g1, mapping_ext, axis=1) + # repformer + g1, g2, h2, rot_mat, sw = self.repformers( + nlist_dict[ + get_multiple_nlist_key( + self.repformers.get_rcut(), self.repformers.get_nsel() + ) + ], + coord_ext, + atype_ext, + g1_ext, + mapping, + ) + if self.concat_output_tebd: + g1 = xp.concat([g1, g1_inp], axis=-1) + return g1, rot_mat, g2, h2, sw + + def serialize(self) -> dict: + repinit = self.repinit + repformers = self.repformers + repinit_three_body = self.repinit_three_body + data = { + "@class": "Descriptor", + "type": "dpa2", + "@version": 3, + "ntypes": self.ntypes, + "repinit_args": self.repinit_args.serialize(), + "repformer_args": self.repformer_args.serialize(), + "concat_output_tebd": self.concat_output_tebd, + "precision": self.precision, + "smooth": self.smooth, + "exclude_types": self.exclude_types, + "env_protection": self.env_protection, + "trainable": self.trainable, + "add_tebd_to_repinit_out": self.add_tebd_to_repinit_out, + "use_econf_tebd": self.use_econf_tebd, + "use_tebd_bias": self.use_tebd_bias, + "type_map": self.type_map, + "type_embedding": self.type_embedding.serialize(), + "g1_shape_tranform": self.g1_shape_tranform.serialize(), + } + if self.add_tebd_to_repinit_out: + data.update( + { + "tebd_transform": self.tebd_transform.serialize(), + } + ) + repinit_variable = { + "embeddings": repinit.embeddings.serialize(), + "env_mat": EnvMat(repinit.rcut, repinit.rcut_smth).serialize(), + "@variables": { + "davg": to_numpy_array(repinit["davg"]), + "dstd": to_numpy_array(repinit["dstd"]), + }, + } + if repinit.tebd_input_mode in ["strip"]: + repinit_variable.update( + {"embeddings_strip": repinit.embeddings_strip.serialize()} + ) + repformers_variable = { + "g2_embd": repformers.g2_embd.serialize(), + "repformer_layers": [layer.serialize() for layer in repformers.layers], + "env_mat": EnvMat(repformers.rcut, repformers.rcut_smth).serialize(), + "@variables": { + "davg": to_numpy_array(repformers["davg"]), + "dstd": to_numpy_array(repformers["dstd"]), + }, + } + data.update( + { + "repinit_variable": repinit_variable, + "repformers_variable": repformers_variable, + } + ) + if self.use_three_body: + repinit_three_body_variable = { + "embeddings": repinit_three_body.embeddings.serialize(), + "env_mat": EnvMat( + repinit_three_body.rcut, repinit_three_body.rcut_smth + ).serialize(), + "@variables": { + "davg": to_numpy_array(repinit_three_body["davg"]), + "dstd": to_numpy_array(repinit_three_body["dstd"]), + }, + } + if repinit_three_body.tebd_input_mode in ["strip"]: + repinit_three_body_variable.update( + { + "embeddings_strip": repinit_three_body.embeddings_strip.serialize() + } + ) + data.update( + { + "repinit_three_body_variable": repinit_three_body_variable, + } + ) + return data + + @classmethod + def deserialize(cls, data: dict) -> "DescrptDPA2": + data = data.copy() + version = data.pop("@version") + check_version_compatibility(version, 3, 1) + data.pop("@class") + data.pop("type") + repinit_variable = data.pop("repinit_variable").copy() + repformers_variable = data.pop("repformers_variable").copy() + repinit_three_body_variable = ( + data.pop("repinit_three_body_variable").copy() + if "repinit_three_body_variable" in data + else None + ) + type_embedding = data.pop("type_embedding") + g1_shape_tranform = data.pop("g1_shape_tranform") + tebd_transform = data.pop("tebd_transform", None) + add_tebd_to_repinit_out = data["add_tebd_to_repinit_out"] + if version < 3: + # compat with old version + data["repformer_args"]["use_sqrt_nnei"] = False + data["repformer_args"]["g1_out_conv"] = False + data["repformer_args"]["g1_out_mlp"] = False + data["repinit"] = RepinitArgs(**data.pop("repinit_args")) + data["repformer"] = RepformerArgs(**data.pop("repformer_args")) + # compat with version 1 + if "use_tebd_bias" not in data: + data["use_tebd_bias"] = True + obj = cls(**data) + obj.type_embedding = TypeEmbedNet.deserialize(type_embedding) + if add_tebd_to_repinit_out: + assert isinstance(tebd_transform, dict) + obj.tebd_transform = NativeLayer.deserialize(tebd_transform) + if obj.repinit.dim_out != obj.repformers.dim_in: + obj.g1_shape_tranform = NativeLayer.deserialize(g1_shape_tranform) + + # deserialize repinit + statistic_repinit = repinit_variable.pop("@variables") + env_mat = repinit_variable.pop("env_mat") + tebd_input_mode = data["repinit"].tebd_input_mode + obj.repinit.embeddings = NetworkCollection.deserialize( + repinit_variable.pop("embeddings") + ) + if tebd_input_mode in ["strip"]: + obj.repinit.embeddings_strip = NetworkCollection.deserialize( + repinit_variable.pop("embeddings_strip") + ) + obj.repinit["davg"] = statistic_repinit["davg"] + obj.repinit["dstd"] = statistic_repinit["dstd"] + + if data["repinit"].use_three_body: + # deserialize repinit_three_body + statistic_repinit_three_body = repinit_three_body_variable.pop("@variables") + env_mat = repinit_three_body_variable.pop("env_mat") + tebd_input_mode = data["repinit"].tebd_input_mode + obj.repinit_three_body.embeddings = NetworkCollection.deserialize( + repinit_three_body_variable.pop("embeddings") + ) + if tebd_input_mode in ["strip"]: + obj.repinit_three_body.embeddings_strip = NetworkCollection.deserialize( + repinit_three_body_variable.pop("embeddings_strip") + ) + obj.repinit_three_body["davg"] = statistic_repinit_three_body["davg"] + obj.repinit_three_body["dstd"] = statistic_repinit_three_body["dstd"] + + # deserialize repformers + statistic_repformers = repformers_variable.pop("@variables") + env_mat = repformers_variable.pop("env_mat") + repformer_layers = repformers_variable.pop("repformer_layers") + obj.repformers.g2_embd = NativeLayer.deserialize( + repformers_variable.pop("g2_embd") + ) + obj.repformers["davg"] = statistic_repformers["davg"] + obj.repformers["dstd"] = statistic_repformers["dstd"] + obj.repformers.layers = [ + RepformerLayer.deserialize(layer) for layer in repformer_layers + ] + return obj + + @classmethod + def update_sel( + cls, + train_data: DeepmdDataSystem, + type_map: Optional[list[str]], + local_jdata: dict, + ) -> tuple[dict, Optional[float]]: + """Update the selection and perform neighbor statistics. + + Parameters + ---------- + train_data : DeepmdDataSystem + data used to do neighbor statistics + type_map : list[str], optional + The name of each type of atoms + local_jdata : dict + The local data refer to the current class + + Returns + ------- + dict + The updated local data + float + The minimum distance between two atoms + """ + local_jdata_cpy = local_jdata.copy() + update_sel = UpdateSel() + min_nbor_dist, repinit_sel = update_sel.update_one_sel( + train_data, + type_map, + local_jdata_cpy["repinit"]["rcut"], + local_jdata_cpy["repinit"]["nsel"], + True, + ) + local_jdata_cpy["repinit"]["nsel"] = repinit_sel[0] + min_nbor_dist, repinit_three_body_sel = update_sel.update_one_sel( + train_data, + type_map, + local_jdata_cpy["repinit"]["three_body_rcut"], + local_jdata_cpy["repinit"]["three_body_sel"], + True, + ) + local_jdata_cpy["repinit"]["three_body_sel"] = repinit_three_body_sel[0] + min_nbor_dist, repformer_sel = update_sel.update_one_sel( + train_data, + type_map, + local_jdata_cpy["repformer"]["rcut"], + local_jdata_cpy["repformer"]["nsel"], + True, + ) + local_jdata_cpy["repformer"]["nsel"] = repformer_sel[0] + return local_jdata_cpy, min_nbor_dist diff --git a/deepmd/dpmodel/descriptor/hybrid.py b/deepmd/dpmodel/descriptor/hybrid.py new file mode 100644 index 0000000000..5c1a7d2785 --- /dev/null +++ b/deepmd/dpmodel/descriptor/hybrid.py @@ -0,0 +1,376 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import math +from typing import ( + Any, + NoReturn, + Optional, + Union, +) + +import array_api_compat +import numpy as np + +from deepmd.dpmodel.common import ( + NativeOP, +) +from deepmd.dpmodel.descriptor.base_descriptor import ( + BaseDescriptor, +) +from deepmd.dpmodel.utils.nlist import ( + nlist_distinguish_types, +) +from deepmd.utils.data_system import ( + DeepmdDataSystem, +) +from deepmd.utils.path import ( + DPPath, +) +from deepmd.utils.version import ( + check_version_compatibility, +) + + +@BaseDescriptor.register("hybrid") +class DescrptHybrid(BaseDescriptor, NativeOP): + """Concate a list of descriptors to form a new descriptor. + + Parameters + ---------- + list : list : list[Union[BaseDescriptor, dict[str, Any]]] + Build a descriptor from the concatenation of the list of descriptors. + The descriptor can be either an object or a dictionary. + """ + + def __init__( + self, + list: list[Union[BaseDescriptor, dict[str, Any]]], + type_map: Optional[list[str]] = None, + ntypes: Optional[int] = None, # to be compat with input + ) -> None: + super().__init__() + # warning: list is conflict with built-in list + descrpt_list = list + if descrpt_list == [] or descrpt_list is None: + raise RuntimeError( + "cannot build descriptor from an empty list of descriptors." + ) + formatted_descript_list = [] + for ii in descrpt_list: + if isinstance(ii, BaseDescriptor): + formatted_descript_list.append(ii) + elif isinstance(ii, dict): + ii = ii.copy() + # only pass if not already set + ii.setdefault("type_map", type_map) + ii.setdefault("ntypes", ntypes) + formatted_descript_list.append(BaseDescriptor(**ii)) + else: + raise NotImplementedError + self.descrpt_list = formatted_descript_list + self.numb_descrpt = len(self.descrpt_list) + for ii in range(1, self.numb_descrpt): + assert ( + self.descrpt_list[ii].get_ntypes() == self.descrpt_list[0].get_ntypes() + ), f"number of atom types in {ii}th descriptor {self.descrpt_list[0].__class__.__name__} does not match others" + # if hybrid sel is larger than sub sel, the nlist needs to be cut for each type + hybrid_sel = self.get_sel() + nlist_cut_idx: list[np.ndarray] = [] + if self.mixed_types() and not all( + descrpt.mixed_types() for descrpt in self.descrpt_list + ): + self.sel_no_mixed_types = np.max( + [ + descrpt.get_sel() + for descrpt in self.descrpt_list + if not descrpt.mixed_types() + ], + axis=0, + ).tolist() + else: + self.sel_no_mixed_types = None + for ii in range(self.numb_descrpt): + if self.mixed_types() == self.descrpt_list[ii].mixed_types(): + hybrid_sel = self.get_sel() + else: + assert self.sel_no_mixed_types is not None + hybrid_sel = self.sel_no_mixed_types + sub_sel = self.descrpt_list[ii].get_sel() + start_idx = np.cumsum(np.pad(hybrid_sel, (1, 0), "constant"))[:-1] + end_idx = start_idx + np.array(sub_sel) + cut_idx = np.concatenate( + [range(ss, ee) for ss, ee in zip(start_idx, end_idx)] + ) + nlist_cut_idx.append(cut_idx) + self.nlist_cut_idx = nlist_cut_idx + + def get_rcut(self) -> float: + """Returns the cut-off radius.""" + return np.max([descrpt.get_rcut() for descrpt in self.descrpt_list]).item() + + def get_rcut_smth(self) -> float: + """Returns the radius where the neighbor information starts to smoothly decay to 0.""" + # may not be a good idea... + # Note: Using the minimum rcut_smth might not be appropriate in all scenarios. Consider using a different approach or provide detailed documentation on why the minimum value is chosen. + return np.min([descrpt.get_rcut_smth() for descrpt in self.descrpt_list]).item() + + def get_sel(self) -> list[int]: + """Returns the number of selected atoms for each type.""" + if self.mixed_types(): + return [ + np.max( + [descrpt.get_nsel() for descrpt in self.descrpt_list], axis=0 + ).item() + ] + else: + return np.max( + [descrpt.get_sel() for descrpt in self.descrpt_list], axis=0 + ).tolist() + + def get_ntypes(self) -> int: + """Returns the number of element types.""" + return self.descrpt_list[0].get_ntypes() + + def get_type_map(self) -> list[str]: + """Get the name to each type of atoms.""" + return self.descrpt_list[0].get_type_map() + + def get_dim_out(self) -> int: + """Returns the output dimension.""" + return np.sum([descrpt.get_dim_out() for descrpt in self.descrpt_list]).item() + + def get_dim_emb(self) -> int: + """Returns the output dimension.""" + return np.sum([descrpt.get_dim_emb() for descrpt in self.descrpt_list]).item() + + def mixed_types(self): + """Returns if the descriptor requires a neighbor list that distinguish different + atomic types or not. + """ + return any(descrpt.mixed_types() for descrpt in self.descrpt_list) + + def has_message_passing(self) -> bool: + """Returns whether the descriptor has message passing.""" + return any(descrpt.has_message_passing() for descrpt in self.descrpt_list) + + def need_sorted_nlist_for_lower(self) -> bool: + """Returns whether the descriptor needs sorted nlist when using `forward_lower`.""" + return True + + def get_env_protection(self) -> float: + """Returns the protection of building environment matrix. All descriptors should be the same.""" + all_protection = [descrpt.get_env_protection() for descrpt in self.descrpt_list] + same_as_0 = [math.isclose(ii, all_protection[0]) for ii in all_protection] + if not all(same_as_0): + raise ValueError( + "Hybrid descriptor requires the same environment matrix protection for all descriptors. Found differing values." + ) + return all_protection[0] + + def share_params(self, base_class, shared_level, resume=False) -> NoReturn: + """ + Share the parameters of self to the base_class with shared_level during multitask training. + If not start from checkpoint (resume is False), + some separated parameters (e.g. mean and stddev) will be re-calculated across different classes. + """ + raise NotImplementedError + + def change_type_map( + self, type_map: list[str], model_with_new_type_stat=None + ) -> None: + """Change the type related params to new ones, according to `type_map` and the original one in the model. + If there are new types in `type_map`, statistics will be updated accordingly to `model_with_new_type_stat` for these new types. + """ + for ii, descrpt in enumerate(self.descrpt_list): + descrpt.change_type_map( + type_map=type_map, + model_with_new_type_stat=model_with_new_type_stat.descrpt_list[ii] + if model_with_new_type_stat is not None + else None, + ) + + def compute_input_stats( + self, merged: list[dict], path: Optional[DPPath] = None + ) -> None: + """Update mean and stddev for descriptor elements.""" + for descrpt in self.descrpt_list: + descrpt.compute_input_stats(merged, path) + + def set_stat_mean_and_stddev( + self, + mean: list[Union[np.ndarray, list[np.ndarray]]], + stddev: list[Union[np.ndarray, list[np.ndarray]]], + ) -> None: + """Update mean and stddev for descriptor.""" + for ii, descrpt in enumerate(self.descrpt_list): + descrpt.set_stat_mean_and_stddev(mean[ii], stddev[ii]) + + def get_stat_mean_and_stddev( + self, + ) -> tuple[ + list[Union[np.ndarray, list[np.ndarray]]], + list[Union[np.ndarray, list[np.ndarray]]], + ]: + """Get mean and stddev for descriptor.""" + mean_list = [] + stddev_list = [] + for ii, descrpt in enumerate(self.descrpt_list): + mean_item, stddev_item = descrpt.get_stat_mean_and_stddev() + mean_list.append(mean_item) + stddev_list.append(stddev_item) + return mean_list, stddev_list + + def enable_compression( + self, + min_nbor_dist: float, + table_extrapolate: float = 5, + table_stride_1: float = 0.01, + table_stride_2: float = 0.1, + check_frequency: int = -1, + ) -> None: + """Receive the statisitcs (distance, max_nbor_size and env_mat_range) of the training data. + + Parameters + ---------- + min_nbor_dist + The nearest distance between atoms + table_extrapolate + The scale of model extrapolation + table_stride_1 + The uniform stride of the first table + table_stride_2 + The uniform stride of the second table + check_frequency + The overflow check frequency + """ + for descrpt in self.descrpt_list: + descrpt.enable_compression( + min_nbor_dist, + table_extrapolate, + table_stride_1, + table_stride_2, + check_frequency, + ) + + def call( + self, + coord_ext, + atype_ext, + nlist, + mapping: Optional[np.ndarray] = None, + ): + """Compute the descriptor. + + Parameters + ---------- + coord_ext + The extended coordinates of atoms. shape: nf x (nallx3) + atype_ext + The extended aotm types. shape: nf x nall + nlist + The neighbor list. shape: nf x nloc x nnei + mapping + The index mapping, not required by this descriptor. + + Returns + ------- + descriptor + The descriptor. shape: nf x nloc x (ng x axis_neuron) + gr + The rotationally equivariant and permutationally invariant single particle + representation. shape: nf x nloc x ng x 3. + g2 + The rotationally invariant pair-partical representation. + h2 + The rotationally equivariant pair-partical representation. + sw + The smooth switch function. + """ + xp = array_api_compat.array_namespace(coord_ext, atype_ext, nlist) + out_descriptor = [] + out_gr = [] + out_g2 = None + out_h2 = None + out_sw = None + if self.sel_no_mixed_types is not None: + nl_distinguish_types = nlist_distinguish_types( + nlist, + atype_ext, + self.sel_no_mixed_types, + ) + else: + nl_distinguish_types = None + for descrpt, nci in zip(self.descrpt_list, self.nlist_cut_idx): + # cut the nlist to the correct length + if self.mixed_types() == descrpt.mixed_types(): + nl = xp.take(nlist, nci, axis=2) + else: + # mixed_types is True, but descrpt.mixed_types is False + assert nl_distinguish_types is not None + nl = nl_distinguish_types[:, :, nci] + odescriptor, gr, g2, h2, sw = descrpt(coord_ext, atype_ext, nl, mapping) + out_descriptor.append(odescriptor) + if gr is not None: + out_gr.append(gr) + + out_descriptor = xp.concat(out_descriptor, axis=-1) + out_gr = xp.concat(out_gr, axis=-2) if out_gr else None + return out_descriptor, out_gr, out_g2, out_h2, out_sw + + @classmethod + def update_sel( + cls, + train_data: DeepmdDataSystem, + type_map: Optional[list[str]], + local_jdata: dict, + ) -> tuple[dict, Optional[float]]: + """Update the selection and perform neighbor statistics. + + Parameters + ---------- + train_data : DeepmdDataSystem + data used to do neighbor statistics + type_map : list[str], optional + The name of each type of atoms + local_jdata : dict + The local data refer to the current class + + Returns + ------- + dict + The updated local data + float + The minimum distance between two atoms + """ + local_jdata_cpy = local_jdata.copy() + new_list = [] + min_nbor_dist = None + for sub_jdata in local_jdata["list"]: + new_sub_jdata, min_nbor_dist_ = BaseDescriptor.update_sel( + train_data, type_map, sub_jdata + ) + if min_nbor_dist_ is not None: + min_nbor_dist = min_nbor_dist_ + new_list.append(new_sub_jdata) + local_jdata_cpy["list"] = new_list + return local_jdata_cpy, min_nbor_dist + + def serialize(self) -> dict: + return { + "@class": "Descriptor", + "type": "hybrid", + "@version": 1, + "list": [descrpt.serialize() for descrpt in self.descrpt_list], + } + + @classmethod + def deserialize(cls, data: dict) -> "DescrptHybrid": + data = data.copy() + class_name = data.pop("@class") + assert class_name == "Descriptor" + class_type = data.pop("type") + assert class_type == "hybrid" + check_version_compatibility(data.pop("@version"), 1, 1) + obj = cls( + list=[BaseDescriptor.deserialize(ii) for ii in data["list"]], + ) + return obj diff --git a/deepmd/dpmodel/descriptor/make_base_descriptor.py b/deepmd/dpmodel/descriptor/make_base_descriptor.py new file mode 100644 index 0000000000..f45e85e516 --- /dev/null +++ b/deepmd/dpmodel/descriptor/make_base_descriptor.py @@ -0,0 +1,243 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from abc import ( + ABC, + abstractmethod, +) +from typing import ( + Callable, + NoReturn, + Optional, + Union, +) + +from deepmd.common import ( + j_get_type, +) +from deepmd.utils.data_system import ( + DeepmdDataSystem, +) +from deepmd.utils.path import ( + DPPath, +) +from deepmd.utils.plugin import ( + PluginVariant, + make_plugin_registry, +) + + +def make_base_descriptor( + t_tensor, + fwd_method_name: str = "forward", +): + """Make the base class for the descriptor. + + Parameters + ---------- + t_tensor + The type of the tensor. used in the type hint. + fwd_method_name + Name of the forward method. For dpmodels, it should be "call". + For torch models, it should be "forward". + + """ + + class BD(ABC, PluginVariant, make_plugin_registry("descriptor")): + """Base descriptor provides the interfaces of descriptor.""" + + def __new__(cls, *args, **kwargs): + if cls is BD: + cls = cls.get_class_by_type(j_get_type(kwargs, cls.__name__)) + return super().__new__(cls) + + @abstractmethod + def get_rcut(self) -> float: + """Returns the cut-off radius.""" + pass + + @abstractmethod + def get_rcut_smth(self) -> float: + """Returns the radius where the neighbor information starts to smoothly decay to 0.""" + pass + + @abstractmethod + def get_sel(self) -> list[int]: + """Returns the number of selected neighboring atoms for each type.""" + pass + + def get_nsel(self) -> int: + """Returns the total number of selected neighboring atoms in the cut-off radius.""" + return sum(self.get_sel()) + + def get_nnei(self) -> int: + """Returns the total number of selected neighboring atoms in the cut-off radius.""" + return self.get_nsel() + + @abstractmethod + def get_ntypes(self) -> int: + """Returns the number of element types.""" + pass + + @abstractmethod + def get_type_map(self) -> list[str]: + """Get the name to each type of atoms.""" + pass + + @abstractmethod + def get_dim_out(self) -> int: + """Returns the output descriptor dimension.""" + pass + + @abstractmethod + def get_dim_emb(self) -> int: + """Returns the embedding dimension of g2.""" + pass + + @abstractmethod + def mixed_types(self) -> bool: + """Returns if the descriptor requires a neighbor list that distinguish different + atomic types or not. + """ + pass + + @abstractmethod + def has_message_passing(self) -> bool: + """Returns whether the descriptor has message passing.""" + + @abstractmethod + def need_sorted_nlist_for_lower(self) -> bool: + """Returns whether the descriptor needs sorted nlist when using `forward_lower`.""" + + @abstractmethod + def get_env_protection(self) -> float: + """Returns the protection of building environment matrix.""" + pass + + @abstractmethod + def share_params(self, base_class, shared_level, resume=False): + """ + Share the parameters of self to the base_class with shared_level during multitask training. + If not start from checkpoint (resume is False), + some separated parameters (e.g. mean and stddev) will be re-calculated across different classes. + """ + pass + + @abstractmethod + def change_type_map( + self, type_map: list[str], model_with_new_type_stat=None + ) -> None: + """Change the type related params to new ones, according to `type_map` and the original one in the model. + If there are new types in `type_map`, statistics will be updated accordingly to `model_with_new_type_stat` for these new types. + """ + pass + + @abstractmethod + def set_stat_mean_and_stddev(self, mean, stddev) -> None: + """Update mean and stddev for descriptor.""" + pass + + @abstractmethod + def get_stat_mean_and_stddev(self): + """Get mean and stddev for descriptor.""" + pass + + def compute_input_stats( + self, + merged: Union[Callable[[], list[dict]], list[dict]], + path: Optional[DPPath] = None, + ) -> NoReturn: + """Update mean and stddev for descriptor elements.""" + raise NotImplementedError + + def enable_compression( + self, + min_nbor_dist: float, + table_extrapolate: float = 5, + table_stride_1: float = 0.01, + table_stride_2: float = 0.1, + check_frequency: int = -1, + ) -> None: + """Receive the statisitcs (distance, max_nbor_size and env_mat_range) of the training data. + + Parameters + ---------- + min_nbor_dist + The nearest distance between atoms + table_extrapolate + The scale of model extrapolation + table_stride_1 + The uniform stride of the first table + table_stride_2 + The uniform stride of the second table + check_frequency + The overflow check frequency + """ + raise NotImplementedError("This descriptor doesn't support compression!") + + @abstractmethod + def fwd( + self, + extended_coord, + extended_atype, + nlist, + mapping: Optional[t_tensor] = None, + ): + """Calculate descriptor.""" + pass + + @abstractmethod + def serialize(self) -> dict: + """Serialize the obj to dict.""" + pass + + @classmethod + def deserialize(cls, data: dict) -> "BD": + """Deserialize the model. + + Parameters + ---------- + data : dict + The serialized data + + Returns + ------- + BD + The deserialized descriptor + """ + if cls is BD: + return BD.get_class_by_type(data["type"]).deserialize(data) + raise NotImplementedError(f"Not implemented in class {cls.__name__}") + + @classmethod + @abstractmethod + def update_sel( + cls, + train_data: DeepmdDataSystem, + type_map: Optional[list[str]], + local_jdata: dict, + ) -> tuple[dict, Optional[float]]: + """Update the selection and perform neighbor statistics. + + Parameters + ---------- + train_data : DeepmdDataSystem + data used to do neighbor statistics + type_map : list[str], optional + The name of each type of atoms + local_jdata : dict + The local data refer to the current class + + Returns + ------- + dict + The updated local data + float + The minimum distance between two atoms + """ + # call subprocess + cls = cls.get_class_by_type(j_get_type(local_jdata, cls.__name__)) + return cls.update_sel(train_data, type_map, local_jdata) + + setattr(BD, fwd_method_name, BD.fwd) + delattr(BD, "fwd") + + return BD diff --git a/deepmd/dpmodel/descriptor/repformers.py b/deepmd/dpmodel/descriptor/repformers.py new file mode 100644 index 0000000000..ae6b5de511 --- /dev/null +++ b/deepmd/dpmodel/descriptor/repformers.py @@ -0,0 +1,1959 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from typing import ( + Callable, + NoReturn, + Optional, + Union, +) + +import array_api_compat +import numpy as np + +from deepmd.dpmodel import ( + PRECISION_DICT, + NativeOP, +) +from deepmd.dpmodel.array_api import ( + xp_take_along_axis, +) +from deepmd.dpmodel.common import ( + to_numpy_array, +) +from deepmd.dpmodel.utils import ( + EnvMat, + PairExcludeMask, +) +from deepmd.dpmodel.utils.network import ( + LayerNorm, + NativeLayer, + get_activation_fn, +) +from deepmd.dpmodel.utils.seed import ( + child_seed, +) +from deepmd.utils.path import ( + DPPath, +) +from deepmd.utils.version import ( + check_version_compatibility, +) + +from .descriptor import ( + DescriptorBlock, +) +from .dpa1 import ( + np_softmax, +) + + +def xp_transpose_01423(x): + xp = array_api_compat.array_namespace(x) + x_shape2 = x.shape[2] + x_shape3 = x.shape[3] + x_shape4 = x.shape[4] + x = xp.reshape(x, (x.shape[0], x.shape[1], x_shape2 * x_shape3, x_shape4)) + x = xp.matrix_transpose(x) + x = xp.reshape(x, (x.shape[0], x.shape[1], x_shape4, x_shape2, x_shape3)) + return x + + +def xp_transpose_01342(x): + xp = array_api_compat.array_namespace(x) + x_shape2 = x.shape[2] + x_shape3 = x.shape[3] + x_shape4 = x.shape[4] + x = xp.reshape(x, (x.shape[0], x.shape[1], x_shape2, x_shape3 * x_shape4)) + x = xp.matrix_transpose(x) + x = xp.reshape(x, (x.shape[0], x.shape[1], x_shape3, x_shape4, x_shape2)) + return x + + +@DescriptorBlock.register("se_repformer") +@DescriptorBlock.register("se_uni") +class DescrptBlockRepformers(NativeOP, DescriptorBlock): + r""" + The repformer descriptor block. + + Parameters + ---------- + rcut : float + The cut-off radius. + rcut_smth : float + Where to start smoothing. For example the 1/r term is smoothed from rcut to rcut_smth. + sel : int + Maximally possible number of selected neighbors. + ntypes : int + Number of element types + nlayers : int, optional + Number of repformer layers. + g1_dim : int, optional + Dimension of the first graph convolution layer. + g2_dim : int, optional + Dimension of the second graph convolution layer. + axis_neuron : int, optional + Size of the submatrix of G (embedding matrix). + direct_dist : bool, optional + Whether to use direct distance information (1/r term) in the repformer block. + update_g1_has_conv : bool, optional + Whether to update the g1 rep with convolution term. + update_g1_has_drrd : bool, optional + Whether to update the g1 rep with the drrd term. + update_g1_has_grrg : bool, optional + Whether to update the g1 rep with the grrg term. + update_g1_has_attn : bool, optional + Whether to update the g1 rep with the localized self-attention. + update_g2_has_g1g1 : bool, optional + Whether to update the g2 rep with the g1xg1 term. + update_g2_has_attn : bool, optional + Whether to update the g2 rep with the gated self-attention. + update_h2 : bool, optional + Whether to update the h2 rep. + attn1_hidden : int, optional + The hidden dimension of localized self-attention to update the g1 rep. + attn1_nhead : int, optional + The number of heads in localized self-attention to update the g1 rep. + attn2_hidden : int, optional + The hidden dimension of gated self-attention to update the g2 rep. + attn2_nhead : int, optional + The number of heads in gated self-attention to update the g2 rep. + attn2_has_gate : bool, optional + Whether to use gate in the gated self-attention to update the g2 rep. + activation_function : str, optional + The activation function in the embedding net. + update_style : str, optional + Style to update a representation. + Supported options are: + -'res_avg': Updates a rep `u` with: u = 1/\\sqrt{n+1} (u + u_1 + u_2 + ... + u_n) + -'res_incr': Updates a rep `u` with: u = u + 1/\\sqrt{n} (u_1 + u_2 + ... + u_n) + -'res_residual': Updates a rep `u` with: u = u + (r1*u_1 + r2*u_2 + ... + r3*u_n) + where `r1`, `r2` ... `r3` are residual weights defined by `update_residual` + and `update_residual_init`. + update_residual : float, optional + When update using residual mode, the initial std of residual vector weights. + update_residual_init : str, optional + When update using residual mode, the initialization mode of residual vector weights. + set_davg_zero : bool, optional + Set the normalization average to zero. + precision : str, optional + The precision of the embedding net parameters. + smooth : bool, optional + Whether to use smoothness in processes such as attention weights calculation. + exclude_types : list[list[int]], optional + The excluded pairs of types which have no interaction with each other. + For example, `[[0, 1]]` means no interaction between type 0 and type 1. + env_protection : float, optional + Protection parameter to prevent division by zero errors during environment matrix calculations. + For example, when using paddings, there may be zero distances of neighbors, which may make division by zero error during environment matrix calculations without protection. + trainable_ln : bool, optional + Whether to use trainable shift and scale weights in layer normalization. + use_sqrt_nnei : bool, optional + Whether to use the square root of the number of neighbors for symmetrization_op normalization instead of using the number of neighbors directly. + g1_out_conv : bool, optional + Whether to put the convolutional update of g1 separately outside the concatenated MLP update. + g1_out_mlp : bool, optional + Whether to put the self MLP update of g1 separately outside the concatenated MLP update. + ln_eps : float, optional + The epsilon value for layer normalization. + seed : int, optional + The random seed for initialization. + """ + + def __init__( + self, + rcut, + rcut_smth, + sel: int, + ntypes: int, + nlayers: int = 3, + g1_dim=128, + g2_dim=16, + axis_neuron: int = 4, + direct_dist: bool = False, + update_g1_has_conv: bool = True, + update_g1_has_drrd: bool = True, + update_g1_has_grrg: bool = True, + update_g1_has_attn: bool = True, + update_g2_has_g1g1: bool = True, + update_g2_has_attn: bool = True, + update_h2: bool = False, + attn1_hidden: int = 64, + attn1_nhead: int = 4, + attn2_hidden: int = 16, + attn2_nhead: int = 4, + attn2_has_gate: bool = False, + activation_function: str = "tanh", + update_style: str = "res_avg", + update_residual: float = 0.001, + update_residual_init: str = "norm", + set_davg_zero: bool = True, + smooth: bool = True, + exclude_types: list[tuple[int, int]] = [], + env_protection: float = 0.0, + precision: str = "float64", + trainable_ln: bool = True, + use_sqrt_nnei: bool = True, + g1_out_conv: bool = True, + g1_out_mlp: bool = True, + ln_eps: Optional[float] = 1e-5, + seed: Optional[Union[int, list[int]]] = None, + ) -> None: + super().__init__() + self.rcut = rcut + self.rcut_smth = rcut_smth + self.ntypes = ntypes + self.nlayers = nlayers + sel = [sel] if isinstance(sel, int) else sel + self.nnei = sum(sel) + self.ndescrpt = self.nnei * 4 # use full descriptor. + assert len(sel) == 1 + self.sel = sel + self.sec = self.sel + self.split_sel = self.sel + self.axis_neuron = axis_neuron + self.set_davg_zero = set_davg_zero + self.g1_dim = g1_dim + self.g2_dim = g2_dim + self.update_g1_has_conv = update_g1_has_conv + self.update_g1_has_drrd = update_g1_has_drrd + self.update_g1_has_grrg = update_g1_has_grrg + self.update_g1_has_attn = update_g1_has_attn + self.update_g2_has_g1g1 = update_g2_has_g1g1 + self.update_g2_has_attn = update_g2_has_attn + self.update_h2 = update_h2 + self.attn1_hidden = attn1_hidden + self.attn1_nhead = attn1_nhead + self.attn2_has_gate = attn2_has_gate + self.attn2_hidden = attn2_hidden + self.attn2_nhead = attn2_nhead + self.activation_function = activation_function + self.update_style = update_style + self.update_residual = update_residual + self.update_residual_init = update_residual_init + self.direct_dist = direct_dist + self.act = get_activation_fn(self.activation_function) + self.smooth = smooth + # order matters, placed after the assignment of self.ntypes + self.reinit_exclude(exclude_types) + self.env_protection = env_protection + self.precision = precision + self.trainable_ln = trainable_ln + self.use_sqrt_nnei = use_sqrt_nnei + self.g1_out_conv = g1_out_conv + self.g1_out_mlp = g1_out_mlp + self.ln_eps = ln_eps + self.epsilon = 1e-4 + + self.g2_embd = NativeLayer( + 1, self.g2_dim, precision=precision, seed=child_seed(seed, 0) + ) + layers = [] + for ii in range(nlayers): + layers.append( + RepformerLayer( + self.rcut, + self.rcut_smth, + self.sel, + self.ntypes, + self.g1_dim, + self.g2_dim, + axis_neuron=self.axis_neuron, + update_chnnl_2=(ii != nlayers - 1), + update_g1_has_conv=self.update_g1_has_conv, + update_g1_has_drrd=self.update_g1_has_drrd, + update_g1_has_grrg=self.update_g1_has_grrg, + update_g1_has_attn=self.update_g1_has_attn, + update_g2_has_g1g1=self.update_g2_has_g1g1, + update_g2_has_attn=self.update_g2_has_attn, + update_h2=self.update_h2, + attn1_hidden=self.attn1_hidden, + attn1_nhead=self.attn1_nhead, + attn2_has_gate=self.attn2_has_gate, + attn2_hidden=self.attn2_hidden, + attn2_nhead=self.attn2_nhead, + activation_function=self.activation_function, + update_style=self.update_style, + update_residual=self.update_residual, + update_residual_init=self.update_residual_init, + smooth=self.smooth, + trainable_ln=self.trainable_ln, + ln_eps=self.ln_eps, + precision=precision, + use_sqrt_nnei=self.use_sqrt_nnei, + g1_out_conv=self.g1_out_conv, + g1_out_mlp=self.g1_out_mlp, + seed=child_seed(child_seed(seed, 1), ii), + ) + ) + self.layers = layers + + wanted_shape = (self.ntypes, self.nnei, 4) + self.env_mat = EnvMat(self.rcut, self.rcut_smth, protection=self.env_protection) + self.mean = np.zeros(wanted_shape, dtype=PRECISION_DICT[self.precision]) + self.stddev = np.ones(wanted_shape, dtype=PRECISION_DICT[self.precision]) + self.orig_sel = self.sel + + def get_rcut(self) -> float: + """Returns the cut-off radius.""" + return self.rcut + + def get_nsel(self) -> int: + """Returns the number of selected atoms in the cut-off radius.""" + return sum(self.sel) + + def get_sel(self) -> list[int]: + """Returns the number of selected atoms for each type.""" + return self.sel + + def get_ntypes(self) -> int: + """Returns the number of element types.""" + return self.ntypes + + def get_dim_in(self) -> int: + """Returns the input dimension.""" + return self.dim_in + + def get_dim_out(self) -> int: + """Returns the output dimension.""" + return self.dim_out + + def get_dim_emb(self) -> int: + """Returns the embedding dimension g2.""" + return self.g2_dim + + def __setitem__(self, key, value) -> None: + if key in ("avg", "data_avg", "davg"): + self.mean = value + elif key in ("std", "data_std", "dstd"): + self.stddev = value + else: + raise KeyError(key) + + def __getitem__(self, key): + if key in ("avg", "data_avg", "davg"): + return self.mean + elif key in ("std", "data_std", "dstd"): + return self.stddev + else: + raise KeyError(key) + + def mixed_types(self) -> bool: + """If true, the descriptor + 1. assumes total number of atoms aligned across frames; + 2. requires a neighbor list that does not distinguish different atomic types. + + If false, the descriptor + 1. assumes total number of atoms of each atom type aligned across frames; + 2. requires a neighbor list that distinguishes different atomic types. + + """ + return True + + @property + def dim_out(self): + """Returns the output dimension of this descriptor.""" + return self.g1_dim + + @property + def dim_in(self): + """Returns the atomic input dimension of this descriptor.""" + return self.g1_dim + + @property + def dim_emb(self): + """Returns the embedding dimension g2.""" + return self.get_dim_emb() + + def compute_input_stats( + self, + merged: Union[Callable[[], list[dict]], list[dict]], + path: Optional[DPPath] = None, + ) -> NoReturn: + """Compute the input statistics (e.g. mean and stddev) for the descriptors from packed data.""" + raise NotImplementedError + + def get_stats(self) -> NoReturn: + """Get the statistics of the descriptor.""" + raise NotImplementedError + + def reinit_exclude( + self, + exclude_types: list[tuple[int, int]] = [], + ) -> None: + self.exclude_types = exclude_types + self.emask = PairExcludeMask(self.ntypes, exclude_types=exclude_types) + + def call( + self, + nlist: np.ndarray, + coord_ext: np.ndarray, + atype_ext: np.ndarray, + atype_embd_ext: Optional[np.ndarray] = None, + mapping: Optional[np.ndarray] = None, + type_embedding: Optional[np.ndarray] = None, + ): + xp = array_api_compat.array_namespace(nlist, coord_ext, atype_ext) + exclude_mask = self.emask.build_type_exclude_mask(nlist, atype_ext) + nlist = xp.where(exclude_mask, nlist, xp.full_like(nlist, -1)) + # nf x nloc x nnei x 4 + dmatrix, diff, sw = self.env_mat.call( + coord_ext, atype_ext, nlist, self.mean, self.stddev + ) + nf, nloc, nnei, _ = dmatrix.shape + # nf x nloc x nnei + nlist_mask = nlist != -1 + # nf x nloc x nnei + sw = xp.reshape(sw, (nf, nloc, nnei)) + sw = xp.where(nlist_mask, sw, xp.zeros_like(sw)) + # nf x nloc x tebd_dim + atype_embd = atype_embd_ext[:, :nloc, :] + assert list(atype_embd.shape) == [nf, nloc, self.g1_dim] + + g1 = self.act(atype_embd) + # nf x nloc x nnei x 1, nf x nloc x nnei x 3 + if not self.direct_dist: + g2, h2 = xp.split(dmatrix, [1], axis=-1) + else: + g2, h2 = xp.linalg.vector_norm(diff, axis=-1, keepdims=True), diff + g2 = g2 / self.rcut + h2 = h2 / self.rcut + # nf x nloc x nnei x ng2 + g2 = self.act(self.g2_embd(g2)) + # set all padding positions to index of 0 + # if a neighbor is real or not is indicated by nlist_mask + nlist = xp.where(nlist == -1, xp.zeros_like(nlist), nlist) + # nf x nall x ng1 + mapping = xp.tile(xp.reshape(mapping, (nf, -1, 1)), (1, 1, self.g1_dim)) + for idx, ll in enumerate(self.layers): + # g1: nf x nloc x ng1 + # g1_ext: nf x nall x ng1 + g1_ext = xp_take_along_axis(g1, mapping, axis=1) + g1, g2, h2 = ll.call( + g1_ext, + g2, + h2, + nlist, + nlist_mask, + sw, + ) + + # nf x nloc x 3 x ng2 + h2g2 = _cal_hg( + g2, + h2, + nlist_mask, + sw, + smooth=self.smooth, + epsilon=self.epsilon, + use_sqrt_nnei=self.use_sqrt_nnei, + ) + # (nf x nloc) x ng2 x 3 + # rot_mat = xp.transpose(h2g2, (0, 1, 3, 2)) + rot_mat = xp.matrix_transpose(h2g2) + return g1, g2, h2, xp.reshape(rot_mat, (nf, nloc, self.dim_emb, 3)), sw + + def has_message_passing(self) -> bool: + """Returns whether the descriptor block has message passing.""" + return True + + def need_sorted_nlist_for_lower(self) -> bool: + """Returns whether the descriptor block needs sorted nlist when using `forward_lower`.""" + return False + + @classmethod + def deserialize(cls, data): + """Deserialize the descriptor block.""" + data = data.copy() + g2_embd = NativeLayer.deserialize(data.pop("g2_embd")) + layers = [RepformerLayer.deserialize(dd) for dd in data.pop("repformer_layers")] + env_mat = EnvMat.deserialize(data.pop("env_mat")) + variables = data.pop("@variables") + davg = variables["davg"] + dstd = variables["dstd"] + obj = cls(**data) + obj.g2_embd = g2_embd + obj.layers = layers + obj.env_mat = env_mat + obj.mean = davg + obj.stddev = dstd + return obj + + def serialize(self): + """Serialize the descriptor block.""" + return { + "rcut": self.rcut, + "rcut_smth": self.rcut_smth, + "sel": self.sel, + "ntypes": self.ntypes, + "nlayers": self.nlayers, + "g1_dim": self.g1_dim, + "g2_dim": self.g2_dim, + "axis_neuron": self.axis_neuron, + "direct_dist": self.direct_dist, + "update_g1_has_conv": self.update_g1_has_conv, + "update_g1_has_drrd": self.update_g1_has_drrd, + "update_g1_has_grrg": self.update_g1_has_grrg, + "update_g1_has_attn": self.update_g1_has_attn, + "update_g2_has_g1g1": self.update_g2_has_g1g1, + "update_g2_has_attn": self.update_g2_has_attn, + "update_h2": self.update_h2, + "attn1_hidden": self.attn1_hidden, + "attn1_nhead": self.attn1_nhead, + "attn2_hidden": self.attn2_hidden, + "attn2_nhead": self.attn2_nhead, + "attn2_has_gate": self.attn2_has_gate, + "activation_function": self.activation_function, + "update_style": self.update_style, + "update_residual": self.update_residual, + "update_residual_init": self.update_residual_init, + "set_davg_zero": self.set_davg_zero, + "smooth": self.smooth, + "exclude_types": self.exclude_types, + "env_protection": self.env_protection, + "precision": self.precision, + "trainable_ln": self.trainable_ln, + "use_sqrt_nnei": self.use_sqrt_nnei, + "g1_out_conv": self.g1_out_conv, + "g1_out_mlp": self.g1_out_mlp, + "ln_eps": self.ln_eps, + # variables + "g2_embd": self.g2_embd.serialize(), + "repformer_layers": [layer.serialize() for layer in self.layers], + "env_mat": self.env_mat.serialize(), + "@variables": { + "davg": to_numpy_array(self["davg"]), + "dstd": to_numpy_array(self["dstd"]), + }, + } + + +# translated by GPT and modified +def get_residual( + _dim: int, + _scale: float, + _mode: str = "norm", + trainable: bool = True, + precision: str = "float64", + seed: Optional[Union[int, list[int]]] = None, +) -> np.ndarray: + """ + Get residual tensor for one update vector. + + Parameters + ---------- + _dim : int + The dimension of the update vector. + _scale + The initial scale of the residual tensor. See `_mode` for details. + _mode + The mode of residual initialization for the residual tensor. + - "norm" (default): init residual using normal with `_scale` std. + - "const": init residual using element-wise constants of `_scale`. + trainable + Whether the residual tensor is trainable. + precision + The precision of the residual tensor. + """ + residual = np.zeros(_dim, dtype=PRECISION_DICT[precision]) + rng = np.random.default_rng(seed=seed) + if trainable: + if _mode == "norm": + residual = rng.normal(scale=_scale, size=_dim).astype( + PRECISION_DICT[precision] + ) + elif _mode == "const": + residual.fill(_scale) + else: + raise RuntimeError(f"Unsupported initialization mode '{_mode}'!") + return residual + + +def _make_nei_g1( + g1_ext: np.ndarray, + nlist: np.ndarray, +) -> np.ndarray: + """ + Make neighbor-wise atomic invariant rep. + + Parameters + ---------- + g1_ext + Extended atomic invariant rep, with shape [nf, nall, ng1]. + nlist + Neighbor list, with shape [nf, nloc, nnei]. + + Returns + ------- + gg1: np.ndarray + Neighbor-wise atomic invariant rep, with shape [nf, nloc, nnei, ng1]. + """ + xp = array_api_compat.array_namespace(g1_ext, nlist) + # nlist: nf x nloc x nnei + nf, nloc, nnei = nlist.shape + # g1_ext: nf x nall x ng1 + ng1 = g1_ext.shape[-1] + # index: nf x (nloc x nnei) x ng1 + index = xp.tile(xp.reshape(nlist, (nf, nloc * nnei, 1)), (1, 1, ng1)) + # gg1 : nf x (nloc x nnei) x ng1 + gg1 = xp_take_along_axis(g1_ext, index, axis=1) + # gg1 : nf x nloc x nnei x ng1 + gg1 = xp.reshape(gg1, (nf, nloc, nnei, ng1)) + return gg1 + + +def _apply_nlist_mask( + gg: np.ndarray, + nlist_mask: np.ndarray, +) -> np.ndarray: + """ + Apply nlist mask to neighbor-wise rep tensors. + + Parameters + ---------- + gg + Neighbor-wise rep tensors, with shape [nf, nloc, nnei, d]. + nlist_mask + Neighbor list mask, where zero means no neighbor, with shape [nf, nloc, nnei]. + """ + xp = array_api_compat.array_namespace(gg, nlist_mask) + masked_gg = xp.where(nlist_mask[:, :, :, None], gg, xp.zeros_like(gg)) + return masked_gg + + +def _apply_switch(gg: np.ndarray, sw: np.ndarray) -> np.ndarray: + """ + Apply switch function to neighbor-wise rep tensors. + + Parameters + ---------- + gg + Neighbor-wise rep tensors, with shape [nf, nloc, nnei, d]. + sw + The switch function, which equals 1 within the rcut_smth range, smoothly decays from 1 to 0 between rcut_smth and rcut, + and remains 0 beyond rcut, with shape [nf, nloc, nnei]. + """ + # gg: nf x nloc x nnei x d + # sw: nf x nloc x nnei + return gg * sw[:, :, :, None] + + +def _cal_hg( + g: np.ndarray, + h: np.ndarray, + nlist_mask: np.ndarray, + sw: np.ndarray, + smooth: bool = True, + epsilon: float = 1e-4, + use_sqrt_nnei: bool = True, +) -> np.ndarray: + """ + Calculate the transposed rotation matrix. + + Parameters + ---------- + g + Neighbor-wise/Pair-wise invariant rep tensors, with shape [nf, nloc, nnei, ng]. + h + Neighbor-wise/Pair-wise equivariant rep tensors, with shape [nf, nloc, nnei, 3]. + nlist_mask + Neighbor list mask, where zero means no neighbor, with shape [nf, nloc, nnei]. + sw + The switch function, which equals 1 within the rcut_smth range, smoothly decays from 1 to 0 between rcut_smth and rcut, + and remains 0 beyond rcut, with shape [nf, nloc, nnei]. + smooth + Whether to use smoothness in processes such as attention weights calculation. + epsilon + Protection of 1./nnei. + use_sqrt_nnei : bool, optional + Whether to use the square root of the number of neighbors for symmetrization_op normalization instead of using the number of neighbors directly. + + Returns + ------- + hg + The transposed rotation matrix, with shape [nf, nloc, 3, ng]. + """ + xp = array_api_compat.array_namespace(g, h, nlist_mask, sw) + # g: nf x nloc x nnei x ng + # h: nf x nloc x nnei x 3 + # msk: nf x nloc x nnei + nf, nloc, nnei, _ = g.shape + ng = g.shape[-1] + # nf x nloc x nnei x ng + g = _apply_nlist_mask(g, nlist_mask) + if not smooth: + # nf x nloc + if not use_sqrt_nnei: + invnnei = 1.0 / (epsilon + xp.sum(xp.astype(nlist_mask, g.dtype), axis=-1)) + else: + invnnei = 1.0 / ( + epsilon + xp.sqrt(xp.sum(xp.astype(nlist_mask, g.dtype), axis=-1)) + ) + # nf x nloc x 1 x 1 + invnnei = invnnei[:, :, xp.newaxis, xp.newaxis] + else: + g = _apply_switch(g, sw) + if not use_sqrt_nnei: + invnnei = (1.0 / float(nnei)) * xp.ones((nf, nloc, 1, 1), dtype=g.dtype) + else: + invnnei = (1.0 / (float(nnei) ** 0.5)) * xp.ones( + (nf, nloc, 1, 1), dtype=g.dtype + ) + # nf x nloc x 3 x ng + hg = xp.matmul(xp.matrix_transpose(h), g) * invnnei + return hg + + +def _cal_grrg(hg: np.ndarray, axis_neuron: int) -> np.ndarray: + """ + Calculate the atomic invariant rep. + + Parameters + ---------- + hg + The transposed rotation matrix, with shape [nf, nloc, 3, ng]. + axis_neuron + Size of the submatrix. + + Returns + ------- + grrg + Atomic invariant rep, with shape [nf, nloc, (axis_neuron * ng)]. + """ + xp = array_api_compat.array_namespace(hg) + # nf x nloc x 3 x ng + nf, nloc, _, ng = hg.shape + # nf x nloc x 3 x axis + hgm = hg[..., :axis_neuron] + # nf x nloc x axis_neuron x ng + grrg = xp.matmul(xp.matrix_transpose(hgm), hg) / (3.0**1) + # nf x nloc x (axis_neuron * ng) + grrg = xp.reshape(grrg, (nf, nloc, axis_neuron * ng)) + return grrg + + +def symmetrization_op( + g: np.ndarray, + h: np.ndarray, + nlist_mask: np.ndarray, + sw: np.ndarray, + axis_neuron: int, + smooth: bool = True, + epsilon: float = 1e-4, + use_sqrt_nnei: bool = True, +) -> np.ndarray: + """ + Symmetrization operator to obtain atomic invariant rep. + + Parameters + ---------- + g + Neighbor-wise/Pair-wise invariant rep tensors, with shape [nf, nloc, nnei, ng]. + h + Neighbor-wise/Pair-wise equivariant rep tensors, with shape [nf, nloc, nnei, 3]. + nlist_mask + Neighbor list mask, where zero means no neighbor, with shape [nf, nloc, nnei]. + sw + The switch function, which equals 1 within the rcut_smth range, smoothly decays from 1 to 0 between rcut_smth and rcut, + and remains 0 beyond rcut, with shape [nf, nloc, nnei]. + axis_neuron + Size of the submatrix. + smooth + Whether to use smoothness in processes such as attention weights calculation. + epsilon + Protection of 1./nnei. + use_sqrt_nnei : bool, optional + Whether to use the square root of the number of neighbors for symmetrization_op normalization instead of using the number of neighbors directly. + + Returns + ------- + grrg + Atomic invariant rep, with shape [nf, nloc, (axis_neuron * ng)]. + """ + # g: nf x nloc x nnei x ng + # h: nf x nloc x nnei x 3 + # msk: nf x nloc x nnei + nf, nloc, nnei, _ = g.shape + # nf x nloc x 3 x ng + hg = _cal_hg( + g, + h, + nlist_mask, + sw, + smooth=smooth, + epsilon=epsilon, + use_sqrt_nnei=use_sqrt_nnei, + ) + # nf x nloc x (axis_neuron x ng) + grrg = _cal_grrg(hg, axis_neuron) + return grrg + + +class Atten2Map(NativeOP): + def __init__( + self, + input_dim: int, + hidden_dim: int, + head_num: int, + has_gate: bool = False, # apply gate to attn map + smooth: bool = True, + attnw_shift: float = 20.0, + precision: str = "float64", + seed: Optional[Union[int, list[int]]] = None, + ) -> None: + """Return neighbor-wise multi-head self-attention maps, with gate mechanism.""" + super().__init__() + self.input_dim = input_dim + self.hidden_dim = hidden_dim + self.head_num = head_num + self.mapqk = NativeLayer( + input_dim, + hidden_dim * 2 * head_num, + bias=False, + precision=precision, + seed=seed, + ) + self.has_gate = has_gate + self.smooth = smooth + self.attnw_shift = attnw_shift + self.precision = precision + + def call( + self, + g2: np.ndarray, # nf x nloc x nnei x ng2 + h2: np.ndarray, # nf x nloc x nnei x 3 + nlist_mask: np.ndarray, # nf x nloc x nnei + sw: np.ndarray, # nf x nloc x nnei + ) -> np.ndarray: + xp = array_api_compat.array_namespace(g2, h2, nlist_mask, sw) + ( + nf, + nloc, + nnei, + _, + ) = g2.shape + nd, nh = self.hidden_dim, self.head_num + # nf x nloc x nnei x nd x (nh x 2) + g2qk = self.mapqk(g2) + g2qk = xp.reshape(g2qk, (nf, nloc, nnei, nd, nh * 2)) + # nf x nloc x (nh x 2) x nnei x nd + # g2qk = xp.transpose(g2qk, (0, 1, 4, 2, 3)) + g2qk = xp_transpose_01423(g2qk) + # nf x nloc x nh x nnei x nd + # g2q, g2k = xp.split(g2qk, [nh], axis=2) + g2q = g2qk[:, :, :nh, :, :] + g2k = g2qk[:, :, nh:, :, :] + # g2q = np.linalg.norm(g2q, axis=-1) + # g2k = np.linalg.norm(g2k, axis=-1) + # nf x nloc x nh x nnei x nnei + attnw = xp.matmul(g2q, xp.matrix_transpose(g2k)) / nd**0.5 + if self.has_gate: + gate = xp.matmul(h2, xp.matrix_transpose(h2)) + gate = xp.reshape(gate, (nf, nloc, 1, nnei, nnei)) + attnw = attnw * gate + # mask the attenmap, nf x nloc x 1 x 1 x nnei + attnw_mask = ~xp.expand_dims(xp.expand_dims(nlist_mask, axis=2), axis=2) + # mask the attenmap, nf x nloc x 1 x nnei x 1 + attnw_mask_c = ~xp.expand_dims(xp.expand_dims(nlist_mask, axis=2), axis=-1) + if self.smooth: + attnw = (attnw + self.attnw_shift) * sw[:, :, None, :, None] * sw[ + :, :, None, None, : + ] - self.attnw_shift + else: + attnw = xp.where(attnw_mask, xp.full_like(attnw, -xp.inf), attnw) + attnw = np_softmax(attnw, axis=-1) + attnw = xp.where(attnw_mask, xp.zeros_like(attnw), attnw) + # nf x nloc x nh x nnei x nnei + attnw = xp.where(attnw_mask_c, xp.zeros_like(attnw), attnw) + if self.smooth: + attnw = attnw * sw[:, :, None, :, None] * sw[:, :, None, None, :] + # nf x nloc x nnei x nnei + h2h2t = xp.matmul(h2, xp.matrix_transpose(h2)) / 3.0**0.5 + # nf x nloc x nh x nnei x nnei + ret = attnw * h2h2t[:, :, None, :, :] + # ret = np.exp(g2qk - np.max(g2qk, axis=-1, keepdims=True)) + # nf x nloc x nnei x nnei x nh + # ret = xp.transpose(ret, (0, 1, 3, 4, 2)) + ret = xp_transpose_01342(ret) + return ret + + def serialize(self) -> dict: + """Serialize the networks to a dict. + + Returns + ------- + dict + The serialized networks. + """ + return { + "@class": "Atten2Map", + "@version": 1, + "input_dim": self.input_dim, + "hidden_dim": self.hidden_dim, + "head_num": self.head_num, + "has_gate": self.has_gate, + "smooth": self.smooth, + "attnw_shift": self.attnw_shift, + "precision": self.precision, + "mapqk": self.mapqk.serialize(), + } + + @classmethod + def deserialize(cls, data: dict) -> "Atten2Map": + """Deserialize the networks from a dict. + + Parameters + ---------- + data : dict + The dict to deserialize from. + """ + data = data.copy() + check_version_compatibility(data.pop("@version"), 1, 1) + data.pop("@class") + mapqk = data.pop("mapqk") + obj = cls(**data) + obj.mapqk = NativeLayer.deserialize(mapqk) + return obj + + +class Atten2MultiHeadApply(NativeOP): + def __init__( + self, + input_dim: int, + head_num: int, + precision: str = "float64", + seed: Optional[Union[int, list[int]]] = None, + ) -> None: + super().__init__() + self.input_dim = input_dim + self.head_num = head_num + self.mapv = NativeLayer( + input_dim, + input_dim * head_num, + bias=False, + precision=precision, + seed=child_seed(seed, 0), + ) + self.head_map = NativeLayer( + input_dim * head_num, + input_dim, + precision=precision, + seed=child_seed(seed, 1), + ) + self.precision = precision + + def call( + self, + AA: np.ndarray, # nf x nloc x nnei x nnei x nh + g2: np.ndarray, # nf x nloc x nnei x ng2 + ) -> np.ndarray: + xp = array_api_compat.array_namespace(AA, g2) + nf, nloc, nnei, ng2 = g2.shape + nh = self.head_num + # nf x nloc x nnei x ng2 x nh + g2v = self.mapv(g2) + g2v = xp.reshape(g2v, (nf, nloc, nnei, ng2, nh)) + # nf x nloc x nh x nnei x ng2 + g2v = xp_transpose_01423(g2v) + # g2v = np.linalg.norm(g2v, axis=-1) + # nf x nloc x nh x nnei x nnei + AA = xp_transpose_01423(AA) + # nf x nloc x nh x nnei x ng2 + ret = xp.matmul(AA, g2v) + # nf x nloc x nnei x ng2 x nh + ret = xp_transpose_01342(ret) + ret = xp.reshape(ret, (nf, nloc, nnei, (ng2 * nh))) + # nf x nloc x nnei x ng2 + return self.head_map(ret) + + def serialize(self) -> dict: + """Serialize the networks to a dict. + + Returns + ------- + dict + The serialized networks. + """ + return { + "@class": "Atten2MultiHeadApply", + "@version": 1, + "input_dim": self.input_dim, + "head_num": self.head_num, + "precision": self.precision, + "mapv": self.mapv.serialize(), + "head_map": self.head_map.serialize(), + } + + @classmethod + def deserialize(cls, data: dict) -> "Atten2MultiHeadApply": + """Deserialize the networks from a dict. + + Parameters + ---------- + data : dict + The dict to deserialize from. + """ + data = data.copy() + check_version_compatibility(data.pop("@version"), 1, 1) + data.pop("@class") + mapv = data.pop("mapv") + head_map = data.pop("head_map") + obj = cls(**data) + obj.mapv = NativeLayer.deserialize(mapv) + obj.head_map = NativeLayer.deserialize(head_map) + return obj + + +class Atten2EquiVarApply(NativeOP): + def __init__( + self, + input_dim: int, + head_num: int, + precision: str = "float64", + seed: Optional[Union[int, list[int]]] = None, + ) -> None: + super().__init__() + self.input_dim = input_dim + self.head_num = head_num + self.head_map = NativeLayer( + head_num, 1, bias=False, precision=precision, seed=seed + ) + self.precision = precision + + def call( + self, + AA: np.ndarray, # nf x nloc x nnei x nnei x nh + h2: np.ndarray, # nf x nloc x nnei x 3 + ) -> np.ndarray: + xp = array_api_compat.array_namespace(AA, h2) + nf, nloc, nnei, _ = h2.shape + nh = self.head_num + # nf x nloc x nh x nnei x nnei + AA = xp_transpose_01423(AA) + h2m = xp.expand_dims(h2, axis=2) + # nf x nloc x nh x nnei x 3 + h2m = xp.tile(h2m, (1, 1, nh, 1, 1)) + # nf x nloc x nh x nnei x 3 + ret = xp.matmul(AA, h2m) + # nf x nloc x nnei x 3 x nh + ret = xp_transpose_01342(ret) + ret = xp.reshape(ret, (nf, nloc, nnei, 3, nh)) + # nf x nloc x nnei x 3 + return xp.squeeze(self.head_map(ret), axis=-1) + + def serialize(self) -> dict: + """Serialize the networks to a dict. + + Returns + ------- + dict + The serialized networks. + """ + return { + "@class": "Atten2EquiVarApply", + "@version": 1, + "input_dim": self.input_dim, + "head_num": self.head_num, + "precision": self.precision, + "head_map": self.head_map.serialize(), + } + + @classmethod + def deserialize(cls, data: dict) -> "Atten2EquiVarApply": + """Deserialize the networks from a dict. + + Parameters + ---------- + data : dict + The dict to deserialize from. + """ + data = data.copy() + check_version_compatibility(data.pop("@version"), 1, 1) + data.pop("@class") + head_map = data.pop("head_map") + obj = cls(**data) + obj.head_map = NativeLayer.deserialize(head_map) + return obj + + +class LocalAtten(NativeOP): + def __init__( + self, + input_dim: int, + hidden_dim: int, + head_num: int, + smooth: bool = True, + attnw_shift: float = 20.0, + precision: str = "float64", + seed: Optional[Union[int, list[int]]] = None, + ) -> None: + super().__init__() + self.input_dim = input_dim + self.hidden_dim = hidden_dim + self.head_num = head_num + self.mapq = NativeLayer( + input_dim, + hidden_dim * 1 * head_num, + bias=False, + precision=precision, + seed=child_seed(seed, 0), + ) + self.mapkv = NativeLayer( + input_dim, + (hidden_dim + input_dim) * head_num, + bias=False, + precision=precision, + seed=child_seed(seed, 1), + ) + self.head_map = NativeLayer( + input_dim * head_num, + input_dim, + precision=precision, + seed=child_seed(seed, 2), + ) + self.smooth = smooth + self.attnw_shift = attnw_shift + self.precision = precision + + def call( + self, + g1: np.ndarray, # nf x nloc x ng1 + gg1: np.ndarray, # nf x nloc x nnei x ng1 + nlist_mask: np.ndarray, # nf x nloc x nnei + sw: np.ndarray, # nf x nloc x nnei + ) -> np.ndarray: + xp = array_api_compat.array_namespace(g1, gg1, nlist_mask, sw) + nf, nloc, nnei = nlist_mask.shape + ni, nd, nh = self.input_dim, self.hidden_dim, self.head_num + assert ni == g1.shape[-1] + assert ni == gg1.shape[-1] + # nf x nloc x nd x nh + g1q = self.mapq(g1) + g1q = xp.reshape(g1q, (nf, nloc, nd, nh)) + # nf x nloc x nh x nd + g1q = xp.matrix_transpose(g1q) + # nf x nloc x nnei x (nd+ni) x nh + gg1kv = self.mapkv(gg1) + gg1kv = xp.reshape(gg1kv, (nf, nloc, nnei, nd + ni, nh)) + gg1kv = xp_transpose_01423(gg1kv) + # nf x nloc x nh x nnei x nd, nf x nloc x nh x nnei x ng1 + # gg1k, gg1v = xp.split(gg1kv, [nd], axis=-1) + gg1k = gg1kv[:, :, :, :, :nd] + gg1v = gg1kv[:, :, :, :, nd:] + + # nf x nloc x nh x 1 x nnei + attnw = ( + xp.matmul(xp.expand_dims(g1q, axis=-2), xp.matrix_transpose(gg1k)) / nd**0.5 + ) + # nf x nloc x nh x nnei + attnw = xp.squeeze(attnw, axis=-2) + # mask the attenmap, nf x nloc x 1 x nnei + attnw_mask = ~xp.expand_dims(nlist_mask, axis=-2) + # nf x nloc x nh x nnei + if self.smooth: + attnw = (attnw + self.attnw_shift) * xp.expand_dims( + sw, axis=-2 + ) - self.attnw_shift + else: + attnw = xp.where(attnw_mask, xp.full_like(attnw, -xp.inf), attnw) + attnw = np_softmax(attnw, axis=-1) + attnw = xp.where(attnw_mask, xp.zeros_like(attnw), attnw) + if self.smooth: + attnw = attnw * xp.expand_dims(sw, axis=-2) + + # nf x nloc x nh x ng1 + ret = xp.matmul(xp.expand_dims(attnw, axis=-2), gg1v) + ret = xp.squeeze(ret, axis=-2) + ret = xp.reshape(ret, (nf, nloc, nh * ni)) + # nf x nloc x ng1 + ret = self.head_map(ret) + return ret + + def serialize(self) -> dict: + """Serialize the networks to a dict. + + Returns + ------- + dict + The serialized networks. + """ + return { + "@class": "LocalAtten", + "@version": 1, + "input_dim": self.input_dim, + "hidden_dim": self.hidden_dim, + "head_num": self.head_num, + "smooth": self.smooth, + "attnw_shift": self.attnw_shift, + "precision": self.precision, + "mapq": self.mapq.serialize(), + "mapkv": self.mapkv.serialize(), + "head_map": self.head_map.serialize(), + } + + @classmethod + def deserialize(cls, data: dict) -> "LocalAtten": + """Deserialize the networks from a dict. + + Parameters + ---------- + data : dict + The dict to deserialize from. + """ + data = data.copy() + check_version_compatibility(data.pop("@version"), 1, 1) + data.pop("@class") + mapq = data.pop("mapq") + mapkv = data.pop("mapkv") + head_map = data.pop("head_map") + obj = cls(**data) + obj.mapq = NativeLayer.deserialize(mapq) + obj.mapkv = NativeLayer.deserialize(mapkv) + obj.head_map = NativeLayer.deserialize(head_map) + return obj + + +class RepformerLayer(NativeOP): + def __init__( + self, + rcut, + rcut_smth, + sel: int, + ntypes: int, + g1_dim=128, + g2_dim=16, + axis_neuron: int = 4, + update_chnnl_2: bool = True, + update_g1_has_conv: bool = True, + update_g1_has_drrd: bool = True, + update_g1_has_grrg: bool = True, + update_g1_has_attn: bool = True, + update_g2_has_g1g1: bool = True, + update_g2_has_attn: bool = True, + update_h2: bool = False, + attn1_hidden: int = 64, + attn1_nhead: int = 4, + attn2_hidden: int = 16, + attn2_nhead: int = 4, + attn2_has_gate: bool = False, + activation_function: str = "tanh", + update_style: str = "res_avg", + update_residual: float = 0.001, + update_residual_init: str = "norm", + smooth: bool = True, + precision: str = "float64", + trainable_ln: bool = True, + use_sqrt_nnei: bool = True, + g1_out_conv: bool = True, + g1_out_mlp: bool = True, + ln_eps: Optional[float] = 1e-5, + seed: Optional[Union[int, list[int]]] = None, + ) -> None: + super().__init__() + self.epsilon = 1e-4 # protection of 1./nnei + self.rcut = rcut + self.rcut_smth = rcut_smth + self.ntypes = ntypes + sel = [sel] if isinstance(sel, int) else sel + self.nnei = sum(sel) + assert len(sel) == 1 + self.sel = sel + self.sec = self.sel + self.axis_neuron = axis_neuron + self.activation_function = activation_function + self.act = get_activation_fn(self.activation_function) + self.update_g1_has_grrg = update_g1_has_grrg + self.update_g1_has_drrd = update_g1_has_drrd + self.update_g1_has_conv = update_g1_has_conv + self.update_g1_has_attn = update_g1_has_attn + self.update_chnnl_2 = update_chnnl_2 + self.update_g2_has_g1g1 = update_g2_has_g1g1 if self.update_chnnl_2 else False + self.update_g2_has_attn = update_g2_has_attn if self.update_chnnl_2 else False + self.update_h2 = update_h2 if self.update_chnnl_2 else False + del update_g2_has_g1g1, update_g2_has_attn, update_h2 + self.attn1_hidden = attn1_hidden + self.attn1_nhead = attn1_nhead + self.attn2_hidden = attn2_hidden + self.attn2_nhead = attn2_nhead + self.attn2_has_gate = attn2_has_gate + self.update_style = update_style + self.update_residual = update_residual + self.update_residual_init = update_residual_init + self.smooth = smooth + self.g1_dim = g1_dim + self.g2_dim = g2_dim + self.trainable_ln = trainable_ln + self.use_sqrt_nnei = use_sqrt_nnei + self.g1_out_conv = g1_out_conv + self.g1_out_mlp = g1_out_mlp + self.ln_eps = ln_eps + self.precision = precision + + assert update_residual_init in [ + "norm", + "const", + ], "'update_residual_init' only support 'norm' or 'const'!" + self.update_residual = update_residual + self.update_residual_init = update_residual_init + g1_residual = [] + g2_residual = [] + h2_residual = [] + + if self.update_style == "res_residual": + g1_residual.append( + get_residual( + g1_dim, + self.update_residual, + self.update_residual_init, + precision=precision, + seed=child_seed(seed, 0), + ) + ) + + g1_in_dim = self.cal_1_dim(g1_dim, g2_dim, self.axis_neuron) + self.linear1 = NativeLayer( + g1_in_dim, + g1_dim, + precision=precision, + seed=child_seed(seed, 1), + ) + self.linear2 = None + self.proj_g1g2 = None + self.proj_g1g1g2 = None + self.attn2g_map = None + self.attn2_mh_apply = None + self.attn2_lm = None + self.attn2_ev_apply = None + self.loc_attn = None + + if self.update_chnnl_2: + self.linear2 = NativeLayer( + g2_dim, + g2_dim, + precision=precision, + seed=child_seed(seed, 2), + ) + if self.update_style == "res_residual": + g2_residual.append( + get_residual( + g2_dim, + self.update_residual, + self.update_residual_init, + precision=precision, + seed=child_seed(seed, 3), + ) + ) + if self.g1_out_mlp: + self.g1_self_mlp = NativeLayer( + g1_dim, + g1_dim, + precision=precision, + seed=child_seed(seed, 15), + ) + if self.update_style == "res_residual": + g1_residual.append( + get_residual( + g1_dim, + self.update_residual, + self.update_residual_init, + precision=precision, + seed=child_seed(seed, 16), + ) + ) + else: + self.g1_self_mlp = None + if self.update_g1_has_conv: + if not self.g1_out_conv: + self.proj_g1g2 = NativeLayer( + g1_dim, + g2_dim, + bias=False, + precision=precision, + seed=child_seed(seed, 4), + ) + else: + self.proj_g1g2 = NativeLayer( + g2_dim, + g1_dim, + bias=False, + precision=precision, + seed=child_seed(seed, 4), + ) + if self.update_style == "res_residual": + g1_residual.append( + get_residual( + g1_dim, + self.update_residual, + self.update_residual_init, + precision=precision, + seed=child_seed(seed, 17), + ) + ) + if self.update_g2_has_g1g1: + self.proj_g1g1g2 = NativeLayer( + g1_dim, + g2_dim, + bias=False, + precision=precision, + seed=child_seed(seed, 5), + ) + if self.update_style == "res_residual": + g2_residual.append( + get_residual( + g2_dim, + self.update_residual, + self.update_residual_init, + precision=precision, + seed=child_seed(seed, 6), + ) + ) + if self.update_g2_has_attn or self.update_h2: + self.attn2g_map = Atten2Map( + g2_dim, + attn2_hidden, + attn2_nhead, + attn2_has_gate, + self.smooth, + precision=precision, + seed=child_seed(seed, 7), + ) + if self.update_g2_has_attn: + self.attn2_mh_apply = Atten2MultiHeadApply( + g2_dim, attn2_nhead, precision=precision, seed=child_seed(seed, 8) + ) + self.attn2_lm = LayerNorm( + g2_dim, + eps=ln_eps, + trainable=trainable_ln, + precision=precision, + seed=child_seed(seed, 9), + ) + if self.update_style == "res_residual": + g2_residual.append( + get_residual( + g2_dim, + self.update_residual, + self.update_residual_init, + precision=precision, + seed=child_seed(seed, 10), + ) + ) + + if self.update_h2: + self.attn2_ev_apply = Atten2EquiVarApply( + g2_dim, attn2_nhead, precision=precision, seed=child_seed(seed, 11) + ) + if self.update_style == "res_residual": + h2_residual.append( + get_residual( + 1, + self.update_residual, + self.update_residual_init, + precision=precision, + seed=child_seed(seed, 12), + ) + ) + if self.update_g1_has_attn: + self.loc_attn = LocalAtten( + g1_dim, + attn1_hidden, + attn1_nhead, + self.smooth, + precision=precision, + seed=child_seed(seed, 13), + ) + if self.update_style == "res_residual": + g1_residual.append( + get_residual( + g1_dim, + self.update_residual, + self.update_residual_init, + precision=precision, + seed=child_seed(seed, 14), + ) + ) + + self.g1_residual = g1_residual + self.g2_residual = g2_residual + self.h2_residual = h2_residual + + def cal_1_dim(self, g1d: int, g2d: int, ax: int) -> int: + ret = g1d if not self.g1_out_mlp else 0 + if self.update_g1_has_grrg: + ret += g2d * ax + if self.update_g1_has_drrd: + ret += g1d * ax + if self.update_g1_has_conv and not self.g1_out_conv: + ret += g2d + return ret + + def _update_h2( + self, + h2: np.ndarray, + attn: np.ndarray, + ) -> np.ndarray: + """ + Calculate the attention weights update for pair-wise equivariant rep. + + Parameters + ---------- + h2 + Pair-wise equivariant rep tensors, with shape nf x nloc x nnei x 3. + attn + Attention weights from g2 attention, with shape nf x nloc x nnei x nnei x nh2. + """ + assert self.attn2_ev_apply is not None + # nf x nloc x nnei x nh2 + h2_1 = self.attn2_ev_apply(attn, h2) + return h2_1 + + def _update_g1_conv( + self, + gg1: np.ndarray, + g2: np.ndarray, + nlist_mask: np.ndarray, + sw: np.ndarray, + ) -> np.ndarray: + """ + Calculate the convolution update for atomic invariant rep. + + Parameters + ---------- + gg1 + Neighbor-wise atomic invariant rep, with shape nf x nloc x nnei x ng1. + g2 + Pair invariant rep, with shape nf x nloc x nnei x ng2. + nlist_mask + Neighbor list mask, where zero means no neighbor, with shape nf x nloc x nnei. + sw + The switch function, which equals 1 within the rcut_smth range, smoothly decays from 1 to 0 between rcut_smth and rcut, + and remains 0 beyond rcut, with shape nf x nloc x nnei. + """ + xp = array_api_compat.array_namespace(gg1, g2, nlist_mask, sw) + assert self.proj_g1g2 is not None + nf, nloc, nnei, _ = g2.shape + ng1 = gg1.shape[-1] + ng2 = g2.shape[-1] + if not self.g1_out_conv: + # gg1 : nf x nloc x nnei x ng2 + gg1 = self.proj_g1g2(gg1) + gg1 = xp.reshape(gg1, (nf, nloc, nnei, ng2)) + else: + # gg1 : nf x nloc x nnei x ng1 + gg1 = xp.reshape(gg1, (nf, nloc, nnei, ng1)) + # nf x nloc x nnei x ng2/ng1 + gg1 = _apply_nlist_mask(gg1, nlist_mask) + if not self.smooth: + # normalized by number of neighbors, not smooth + # nf x nloc + invnnei = 1.0 / ( + self.epsilon + xp.sum(xp.astype(nlist_mask, gg1.dtype), axis=-1) + ) + # nf x nloc x 1 + invnnei = invnnei[:, :, xp.newaxis] + else: + gg1 = _apply_switch(gg1, sw) + invnnei = (1.0 / float(nnei)) * xp.ones((nf, nloc, 1), dtype=gg1.dtype) + if not self.g1_out_conv: + # nf x nloc x ng2 + g1_11 = xp.sum(g2 * gg1, axis=2) * invnnei + else: + # nf x nloc x ng1 + g2 = self.proj_g1g2(g2) + g2 = xp.reshape(g2, (nf, nloc, nnei, ng1)) + # nb x nloc x ng1 + g1_11 = xp.sum(g2 * gg1, axis=2) * invnnei + return g1_11 + + def _update_g2_g1g1( + self, + g1: np.ndarray, # nf x nloc x ng1 + gg1: np.ndarray, # nf x nloc x nnei x ng1 + nlist_mask: np.ndarray, # nf x nloc x nnei + sw: np.ndarray, # nf x nloc x nnei + ) -> np.ndarray: + """ + Update the g2 using element-wise dot g1_i * g1_j. + + Parameters + ---------- + g1 + Atomic invariant rep, with shape nf x nloc x ng1. + gg1 + Neighbor-wise atomic invariant rep, with shape nf x nloc x nnei x ng1. + nlist_mask + Neighbor list mask, where zero means no neighbor, with shape nf x nloc x nnei. + sw + The switch function, which equals 1 within the rcut_smth range, smoothly decays from 1 to 0 between rcut_smth and rcut, + and remains 0 beyond rcut, with shape nf x nloc x nnei. + """ + xp = array_api_compat.array_namespace(g1, gg1, nlist_mask, sw) + ret = xp.expand_dims(g1, axis=-2) * gg1 + # nf x nloc x nnei x ng1 + ret = _apply_nlist_mask(ret, nlist_mask) + if self.smooth: + ret = _apply_switch(ret, sw) + return ret + + def call( + self, + g1_ext: np.ndarray, # nf x nall x ng1 + g2: np.ndarray, # nf x nloc x nnei x ng2 + h2: np.ndarray, # nf x nloc x nnei x 3 + nlist: np.ndarray, # nf x nloc x nnei + nlist_mask: np.ndarray, # nf x nloc x nnei + sw: np.ndarray, # switch func, nf x nloc x nnei + ): + """ + Parameters + ---------- + g1_ext : nf x nall x ng1 extended single-atom channel + g2 : nf x nloc x nnei x ng2 pair-atom channel, invariant + h2 : nf x nloc x nnei x 3 pair-atom channel, equivariant + nlist : nf x nloc x nnei neighbor list (padded neis are set to 0) + nlist_mask : nf x nloc x nnei masks of the neighbor list. real nei 1 otherwise 0 + sw : nf x nloc x nnei switch function + + Returns + ------- + g1: nf x nloc x ng1 updated single-atom channel + g2: nf x nloc x nnei x ng2 updated pair-atom channel, invariant + h2: nf x nloc x nnei x 3 updated pair-atom channel, equivariant + """ + xp = array_api_compat.array_namespace(g1_ext, g2, h2, nlist, nlist_mask, sw) + cal_gg1 = ( + self.update_g1_has_drrd + or self.update_g1_has_conv + or self.update_g1_has_attn + or self.update_g2_has_g1g1 + ) + + nf, nloc, nnei, _ = g2.shape + nall = g1_ext.shape[1] + # g1, _ = xp.split(g1_ext, [nloc], axis=1) + g1 = g1_ext[:, :nloc, :] + assert (nf, nloc) == g1.shape[:2] + assert (nf, nloc, nnei) == h2.shape[:3] + + g2_update: list[np.ndarray] = [g2] + h2_update: list[np.ndarray] = [h2] + g1_update: list[np.ndarray] = [g1] + g1_mlp: list[np.ndarray] = [g1] if not self.g1_out_mlp else [] + if self.g1_out_mlp: + assert self.g1_self_mlp is not None + g1_self_mlp = self.act(self.g1_self_mlp(g1)) + g1_update.append(g1_self_mlp) + + if cal_gg1: + gg1 = _make_nei_g1(g1_ext, nlist) + else: + gg1 = None + + if self.update_chnnl_2: + # mlp(g2) + assert self.linear2 is not None + # nf x nloc x nnei x ng2 + g2_1 = self.act(self.linear2(g2)) + g2_update.append(g2_1) + + if self.update_g2_has_g1g1: + # linear(g1_i * g1_j) + assert gg1 is not None + assert self.proj_g1g1g2 is not None + g2_update.append( + self.proj_g1g1g2(self._update_g2_g1g1(g1, gg1, nlist_mask, sw)) + ) + + if self.update_g2_has_attn or self.update_h2: + # gated_attention(g2, h2) + assert self.attn2g_map is not None + # nf x nloc x nnei x nnei x nh + AAg = self.attn2g_map(g2, h2, nlist_mask, sw) + + if self.update_g2_has_attn: + assert self.attn2_mh_apply is not None + assert self.attn2_lm is not None + # nf x nloc x nnei x ng2 + g2_2 = self.attn2_mh_apply(AAg, g2) + g2_2 = self.attn2_lm(g2_2) + g2_update.append(g2_2) + + if self.update_h2: + # linear_head(attention_weights * h2) + h2_update.append(self._update_h2(h2, AAg)) + + if self.update_g1_has_conv: + assert gg1 is not None + g1_conv = self._update_g1_conv(gg1, g2, nlist_mask, sw) + if not self.g1_out_conv: + g1_mlp.append(g1_conv) + else: + g1_update.append(g1_conv) + + if self.update_g1_has_grrg: + g1_mlp.append( + symmetrization_op( + g2, + h2, + nlist_mask, + sw, + self.axis_neuron, + smooth=self.smooth, + epsilon=self.epsilon, + use_sqrt_nnei=self.use_sqrt_nnei, + ) + ) + + if self.update_g1_has_drrd: + assert gg1 is not None + g1_mlp.append( + symmetrization_op( + gg1, + h2, + nlist_mask, + sw, + self.axis_neuron, + smooth=self.smooth, + epsilon=self.epsilon, + use_sqrt_nnei=self.use_sqrt_nnei, + ) + ) + + # nf x nloc x [ng1+ng2+(axisxng2)+(axisxng1)] + # conv grrg drrd + g1_1 = self.act(self.linear1(xp.concat(g1_mlp, axis=-1))) + g1_update.append(g1_1) + + if self.update_g1_has_attn: + assert gg1 is not None + assert self.loc_attn is not None + g1_update.append(self.loc_attn(g1, gg1, nlist_mask, sw)) + + # update + if self.update_chnnl_2: + g2_new = self.list_update(g2_update, "g2") + h2_new = self.list_update(h2_update, "h2") + else: + g2_new, h2_new = g2, h2 + g1_new = self.list_update(g1_update, "g1") + return g1_new, g2_new, h2_new + + def list_update_res_avg( + self, + update_list: list[np.ndarray], + ) -> np.ndarray: + nitem = len(update_list) + uu = update_list[0] + for ii in range(1, nitem): + uu = uu + update_list[ii] + return uu / (float(nitem) ** 0.5) + + def list_update_res_incr(self, update_list: list[np.ndarray]) -> np.ndarray: + nitem = len(update_list) + uu = update_list[0] + scale = 1.0 / (float(nitem - 1) ** 0.5) if nitem > 1 else 0.0 + for ii in range(1, nitem): + uu = uu + scale * update_list[ii] + return uu + + def list_update_res_residual( + self, update_list: list[np.ndarray], update_name: str = "g1" + ) -> np.ndarray: + nitem = len(update_list) + uu = update_list[0] + if update_name == "g1": + for ii, vv in enumerate(self.g1_residual): + uu = uu + vv * update_list[ii + 1] + elif update_name == "g2": + for ii, vv in enumerate(self.g2_residual): + uu = uu + vv * update_list[ii + 1] + elif update_name == "h2": + for ii, vv in enumerate(self.h2_residual): + uu = uu + vv * update_list[ii + 1] + else: + raise NotImplementedError + return uu + + def list_update( + self, update_list: list[np.ndarray], update_name: str = "g1" + ) -> np.ndarray: + if self.update_style == "res_avg": + return self.list_update_res_avg(update_list) + elif self.update_style == "res_incr": + return self.list_update_res_incr(update_list) + elif self.update_style == "res_residual": + return self.list_update_res_residual(update_list, update_name=update_name) + else: + raise RuntimeError(f"unknown update style {self.update_style}") + + def serialize(self) -> dict: + """Serialize the networks to a dict. + + Returns + ------- + dict + The serialized networks. + """ + data = { + "@class": "RepformerLayer", + "@version": 2, + "rcut": self.rcut, + "rcut_smth": self.rcut_smth, + "sel": self.sel, + "ntypes": self.ntypes, + "g1_dim": self.g1_dim, + "g2_dim": self.g2_dim, + "axis_neuron": self.axis_neuron, + "update_chnnl_2": self.update_chnnl_2, + "update_g1_has_conv": self.update_g1_has_conv, + "update_g1_has_drrd": self.update_g1_has_drrd, + "update_g1_has_grrg": self.update_g1_has_grrg, + "update_g1_has_attn": self.update_g1_has_attn, + "update_g2_has_g1g1": self.update_g2_has_g1g1, + "update_g2_has_attn": self.update_g2_has_attn, + "update_h2": self.update_h2, + "attn1_hidden": self.attn1_hidden, + "attn1_nhead": self.attn1_nhead, + "attn2_hidden": self.attn2_hidden, + "attn2_nhead": self.attn2_nhead, + "attn2_has_gate": self.attn2_has_gate, + "activation_function": self.activation_function, + "update_style": self.update_style, + "smooth": self.smooth, + "precision": self.precision, + "trainable_ln": self.trainable_ln, + "use_sqrt_nnei": self.use_sqrt_nnei, + "g1_out_conv": self.g1_out_conv, + "g1_out_mlp": self.g1_out_mlp, + "ln_eps": self.ln_eps, + "linear1": self.linear1.serialize(), + } + if self.update_chnnl_2: + data.update( + { + "linear2": self.linear2.serialize(), + } + ) + if self.update_g1_has_conv: + data.update( + { + "proj_g1g2": self.proj_g1g2.serialize(), + } + ) + if self.update_g2_has_g1g1: + data.update( + { + "proj_g1g1g2": self.proj_g1g1g2.serialize(), + } + ) + if self.update_g2_has_attn or self.update_h2: + data.update( + { + "attn2g_map": self.attn2g_map.serialize(), + } + ) + if self.update_g2_has_attn: + data.update( + { + "attn2_mh_apply": self.attn2_mh_apply.serialize(), + "attn2_lm": self.attn2_lm.serialize(), + } + ) + + if self.update_h2: + data.update( + { + "attn2_ev_apply": self.attn2_ev_apply.serialize(), + } + ) + if self.update_g1_has_attn: + data.update( + { + "loc_attn": self.loc_attn.serialize(), + } + ) + if self.g1_out_mlp: + data.update( + { + "g1_self_mlp": self.g1_self_mlp.serialize(), + } + ) + if self.update_style == "res_residual": + data.update( + { + "@variables": { + "g1_residual": [to_numpy_array(aa) for aa in self.g1_residual], + "g2_residual": [to_numpy_array(aa) for aa in self.g2_residual], + "h2_residual": [to_numpy_array(aa) for aa in self.h2_residual], + } + } + ) + return data + + @classmethod + def deserialize(cls, data: dict) -> "RepformerLayer": + """Deserialize the networks from a dict. + + Parameters + ---------- + data : dict + The dict to deserialize from. + """ + data = data.copy() + check_version_compatibility(data.pop("@version"), 2, 1) + data.pop("@class") + linear1 = data.pop("linear1") + update_chnnl_2 = data["update_chnnl_2"] + update_g1_has_conv = data["update_g1_has_conv"] + update_g2_has_g1g1 = data["update_g2_has_g1g1"] + update_g2_has_attn = data["update_g2_has_attn"] + update_h2 = data["update_h2"] + update_g1_has_attn = data["update_g1_has_attn"] + update_style = data["update_style"] + g1_out_mlp = data["g1_out_mlp"] + + linear2 = data.pop("linear2", None) + proj_g1g2 = data.pop("proj_g1g2", None) + proj_g1g1g2 = data.pop("proj_g1g1g2", None) + attn2g_map = data.pop("attn2g_map", None) + attn2_mh_apply = data.pop("attn2_mh_apply", None) + attn2_lm = data.pop("attn2_lm", None) + attn2_ev_apply = data.pop("attn2_ev_apply", None) + loc_attn = data.pop("loc_attn", None) + g1_self_mlp = data.pop("g1_self_mlp", None) + variables = data.pop("@variables", {}) + g1_residual = variables.get("g1_residual", data.pop("g1_residual", [])) + g2_residual = variables.get("g2_residual", data.pop("g2_residual", [])) + h2_residual = variables.get("h2_residual", data.pop("h2_residual", [])) + + obj = cls(**data) + obj.linear1 = NativeLayer.deserialize(linear1) + if update_chnnl_2: + assert isinstance(linear2, dict) + obj.linear2 = NativeLayer.deserialize(linear2) + if update_g1_has_conv: + assert isinstance(proj_g1g2, dict) + obj.proj_g1g2 = NativeLayer.deserialize(proj_g1g2) + if update_g2_has_g1g1: + assert isinstance(proj_g1g1g2, dict) + obj.proj_g1g1g2 = NativeLayer.deserialize(proj_g1g1g2) + if update_g2_has_attn or update_h2: + assert isinstance(attn2g_map, dict) + obj.attn2g_map = Atten2Map.deserialize(attn2g_map) + if update_g2_has_attn: + assert isinstance(attn2_mh_apply, dict) + assert isinstance(attn2_lm, dict) + obj.attn2_mh_apply = Atten2MultiHeadApply.deserialize(attn2_mh_apply) + obj.attn2_lm = LayerNorm.deserialize(attn2_lm) + if update_h2: + assert isinstance(attn2_ev_apply, dict) + obj.attn2_ev_apply = Atten2EquiVarApply.deserialize(attn2_ev_apply) + if update_g1_has_attn: + assert isinstance(loc_attn, dict) + obj.loc_attn = LocalAtten.deserialize(loc_attn) + if g1_out_mlp: + assert isinstance(g1_self_mlp, dict) + obj.g1_self_mlp = NativeLayer.deserialize(g1_self_mlp) + if update_style == "res_residual": + obj.g1_residual = g1_residual + obj.g2_residual = g2_residual + obj.h2_residual = h2_residual + return obj diff --git a/deepmd/dpmodel/descriptor/se_atten_v2.py b/deepmd/dpmodel/descriptor/se_atten_v2.py new file mode 100644 index 0000000000..897863ec0f --- /dev/null +++ b/deepmd/dpmodel/descriptor/se_atten_v2.py @@ -0,0 +1,187 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from typing import ( + Any, + Optional, + Union, +) + +import numpy as np + +from deepmd.dpmodel import ( + DEFAULT_PRECISION, + PRECISION_DICT, +) +from deepmd.dpmodel.common import ( + to_numpy_array, +) +from deepmd.dpmodel.utils import ( + NetworkCollection, +) +from deepmd.dpmodel.utils.type_embed import ( + TypeEmbedNet, +) +from deepmd.utils.version import ( + check_version_compatibility, +) + +from .base_descriptor import ( + BaseDescriptor, +) +from .dpa1 import ( + DescrptDPA1, + NeighborGatedAttention, +) + + +@BaseDescriptor.register("se_atten_v2") +class DescrptSeAttenV2(DescrptDPA1): + def __init__( + self, + rcut: float, + rcut_smth: float, + sel: Union[list[int], int], + ntypes: int, + neuron: list[int] = [25, 50, 100], + axis_neuron: int = 8, + tebd_dim: int = 8, + resnet_dt: bool = False, + trainable: bool = True, + type_one_side: bool = False, + attn: int = 128, + attn_layer: int = 2, + attn_dotr: bool = True, + attn_mask: bool = False, + exclude_types: list[tuple[int, int]] = [], + env_protection: float = 0.0, + set_davg_zero: bool = False, + activation_function: str = "tanh", + precision: str = DEFAULT_PRECISION, + scaling_factor=1.0, + normalize: bool = True, + temperature: Optional[float] = None, + trainable_ln: bool = True, + ln_eps: Optional[float] = 1e-5, + concat_output_tebd: bool = True, + spin: Optional[Any] = None, + stripped_type_embedding: Optional[bool] = None, + use_econf_tebd: bool = False, + use_tebd_bias: bool = False, + type_map: Optional[list[str]] = None, + # consistent with argcheck, not used though + seed: Optional[Union[int, list[int]]] = None, + ) -> None: + DescrptDPA1.__init__( + self, + rcut, + rcut_smth, + sel, + ntypes, + neuron=neuron, + axis_neuron=axis_neuron, + tebd_dim=tebd_dim, + tebd_input_mode="strip", + resnet_dt=resnet_dt, + trainable=trainable, + type_one_side=type_one_side, + attn=attn, + attn_layer=attn_layer, + attn_dotr=attn_dotr, + attn_mask=attn_mask, + exclude_types=exclude_types, + env_protection=env_protection, + set_davg_zero=set_davg_zero, + activation_function=activation_function, + precision=precision, + scaling_factor=scaling_factor, + normalize=normalize, + temperature=temperature, + trainable_ln=trainable_ln, + ln_eps=ln_eps, + smooth_type_embedding=True, + concat_output_tebd=concat_output_tebd, + spin=spin, + stripped_type_embedding=stripped_type_embedding, + use_econf_tebd=use_econf_tebd, + use_tebd_bias=use_tebd_bias, + type_map=type_map, + # consistent with argcheck, not used though + seed=seed, + ) + + def serialize(self) -> dict: + """Serialize the descriptor to dict.""" + obj = self.se_atten + data = { + "@class": "Descriptor", + "type": "se_atten_v2", + "@version": 2, + "rcut": obj.rcut, + "rcut_smth": obj.rcut_smth, + "sel": obj.sel, + "ntypes": obj.ntypes, + "neuron": obj.neuron, + "axis_neuron": obj.axis_neuron, + "tebd_dim": obj.tebd_dim, + "set_davg_zero": obj.set_davg_zero, + "attn": obj.attn, + "attn_layer": obj.attn_layer, + "attn_dotr": obj.attn_dotr, + "attn_mask": False, + "activation_function": obj.activation_function, + "resnet_dt": obj.resnet_dt, + "scaling_factor": obj.scaling_factor, + "normalize": obj.normalize, + "temperature": obj.temperature, + "trainable_ln": obj.trainable_ln, + "ln_eps": obj.ln_eps, + "type_one_side": obj.type_one_side, + "concat_output_tebd": self.concat_output_tebd, + "use_econf_tebd": self.use_econf_tebd, + "use_tebd_bias": self.use_tebd_bias, + "type_map": self.type_map, + # make deterministic + "precision": np.dtype(PRECISION_DICT[obj.precision]).name, + "embeddings": obj.embeddings.serialize(), + "embeddings_strip": obj.embeddings_strip.serialize(), + "attention_layers": obj.dpa1_attention.serialize(), + "env_mat": obj.env_mat.serialize(), + "type_embedding": self.type_embedding.serialize(), + "exclude_types": obj.exclude_types, + "env_protection": obj.env_protection, + "@variables": { + "davg": to_numpy_array(obj["davg"]), + "dstd": to_numpy_array(obj["dstd"]), + }, + ## to be updated when the options are supported. + "trainable": self.trainable, + "spin": None, + } + return data + + @classmethod + def deserialize(cls, data: dict) -> "DescrptSeAttenV2": + """Deserialize from dict.""" + data = data.copy() + check_version_compatibility(data.pop("@version"), 2, 1) + data.pop("@class") + data.pop("type") + variables = data.pop("@variables") + embeddings = data.pop("embeddings") + type_embedding = data.pop("type_embedding") + attention_layers = data.pop("attention_layers") + data.pop("env_mat") + embeddings_strip = data.pop("embeddings_strip") + # compat with version 1 + if "use_tebd_bias" not in data: + data["use_tebd_bias"] = True + obj = cls(**data) + + obj.se_atten["davg"] = variables["davg"] + obj.se_atten["dstd"] = variables["dstd"] + obj.se_atten.embeddings = NetworkCollection.deserialize(embeddings) + obj.se_atten.embeddings_strip = NetworkCollection.deserialize(embeddings_strip) + obj.type_embedding = TypeEmbedNet.deserialize(type_embedding) + obj.se_atten.dpa1_attention = NeighborGatedAttention.deserialize( + attention_layers + ) + return obj diff --git a/deepmd/dpmodel/descriptor/se_e2_a.py b/deepmd/dpmodel/descriptor/se_e2_a.py new file mode 100644 index 0000000000..598d5c5fcc --- /dev/null +++ b/deepmd/dpmodel/descriptor/se_e2_a.py @@ -0,0 +1,590 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import itertools +from typing import ( + Any, + NoReturn, + Optional, + Union, +) + +import array_api_compat +import numpy as np + +from deepmd.dpmodel import ( + DEFAULT_PRECISION, + PRECISION_DICT, + NativeOP, +) +from deepmd.dpmodel.common import ( + cast_precision, + to_numpy_array, +) +from deepmd.dpmodel.utils import ( + EmbeddingNet, + EnvMat, + NetworkCollection, + PairExcludeMask, +) +from deepmd.dpmodel.utils.seed import ( + child_seed, +) +from deepmd.dpmodel.utils.update_sel import ( + UpdateSel, +) +from deepmd.utils.data_system import ( + DeepmdDataSystem, +) +from deepmd.utils.path import ( + DPPath, +) +from deepmd.utils.version import ( + check_version_compatibility, +) + +from .base_descriptor import ( + BaseDescriptor, +) + + +@BaseDescriptor.register("se_e2_a") +@BaseDescriptor.register("se_a") +class DescrptSeA(NativeOP, BaseDescriptor): + r"""DeepPot-SE constructed from all information (both angular and radial) of + atomic configurations. The embedding takes the distance between atoms as input. + + The descriptor :math:`\mathcal{D}^i \in \mathcal{R}^{M_1 \times M_2}` is given by [1]_ + + .. math:: + \mathcal{D}^i = (\mathcal{G}^i)^T \mathcal{R}^i (\mathcal{R}^i)^T \mathcal{G}^i_< + + where :math:`\mathcal{R}^i \in \mathbb{R}^{N \times 4}` is the coordinate + matrix, and each row of :math:`\mathcal{R}^i` can be constructed as follows + + .. math:: + (\mathcal{R}^i)_j = [ + \begin{array}{c} + s(r_{ji}) & \frac{s(r_{ji})x_{ji}}{r_{ji}} & \frac{s(r_{ji})y_{ji}}{r_{ji}} & \frac{s(r_{ji})z_{ji}}{r_{ji}} + \end{array} + ] + + where :math:`\mathbf{R}_{ji}=\mathbf{R}_j-\mathbf{R}_i = (x_{ji}, y_{ji}, z_{ji})` is + the relative coordinate and :math:`r_{ji}=\lVert \mathbf{R}_{ji} \lVert` is its norm. + The switching function :math:`s(r)` is defined as: + + .. math:: + s(r)= + \begin{cases} + \frac{1}{r}, & r