From 7473f3e5401a5c458745a0fb5e0e4e7d73c605b1 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Mon, 18 Dec 2023 16:03:17 -0600 Subject: [PATCH 01/19] Add back build_hermes CI --- .github/workflows/main.yml | 8 ++++---- ci/build_hermes.sh | 7 ++++++- test/unit/pipelines/test_ior.yaml | 10 +++++++--- 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index b428bf3cc..11fb9bef9 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -71,10 +71,10 @@ jobs: if: steps.spack-cache.outputs.cache-hit != 'true' run: ci/install_deps.sh -# - name: Build -# if: steps.hermes-cache.outputs.cache-hit != 'true' -# run: ci/build_hermes.sh -# + - name: Build and Test + # if: steps.hermes-cache.outputs.cache-hit != 'true' + run: ci/build_hermes.sh + # - name: Test # run: bash ci/test_hermes.sh # diff --git a/ci/build_hermes.sh b/ci/build_hermes.sh index a13d3c167..52194652f 100755 --- a/ci/build_hermes.sh +++ b/ci/build_hermes.sh @@ -18,7 +18,12 @@ cd build spack load hermes_shm cmake ../ \ -DCMAKE_BUILD_TYPE=Debug \ --DCMAKE_INSTALL_PREFIX="${HOME}/install" +-DCMAKE_INSTALL_PREFIX="${HOME}/install" \ +-DHERMES_ENABLE_MPIIO_ADAPTER=ON \ +-DHERMES_MPICH=ON \ +-DHERMES_ENABLE_SDTIO_ADAPTER=ON \ +-DHERMES_ENABLE_POSIX_ADAPTER=ON \ +-DHERMES_ENABLE_COVERAGE=ON make -j8 make install diff --git a/test/unit/pipelines/test_ior.yaml b/test/unit/pipelines/test_ior.yaml index d84366214..fc60c7858 100644 --- a/test/unit/pipelines/test_ior.yaml +++ b/test/unit/pipelines/test_ior.yaml @@ -1,14 +1,18 @@ name: hermes_unit_ior env: hermes pkgs: - - pkg_type: asan - pkg_name: asan - pkg_type: hermes_run pkg_name: hermes_run + include: /tmp/test_hermes sleep: 5 + ram: 1g - pkg_type: hermes_api pkg_name: hermes_api posix: true - pkg_type: ior pkg_name: ior - api: posix \ No newline at end of file + api: posix + out: /tmp/test_hermes/ior.bin + xfer: 1m + block: 32g + nprocs: 4 \ No newline at end of file From c9db835d15d61608665acfeab46044cb2b2e8104 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Mon, 18 Dec 2023 16:05:27 -0600 Subject: [PATCH 02/19] Update submodules --- ci/build_hermes.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/build_hermes.sh b/ci/build_hermes.sh index 52194652f..574bb48d8 100755 --- a/ci/build_hermes.sh +++ b/ci/build_hermes.sh @@ -2,6 +2,7 @@ # CD into git workspace cd ${GITHUB_WORKSPACE} +git submodule update --init set -x set -e From dc9dbb64039a7bb2836aa8a3a9c99d11f3eaae19 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Mon, 18 Dec 2023 16:07:32 -0600 Subject: [PATCH 03/19] STDIO enable ON --- ci/build_hermes.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/build_hermes.sh b/ci/build_hermes.sh index 574bb48d8..bb6e4eac0 100755 --- a/ci/build_hermes.sh +++ b/ci/build_hermes.sh @@ -22,7 +22,7 @@ cmake ../ \ -DCMAKE_INSTALL_PREFIX="${HOME}/install" \ -DHERMES_ENABLE_MPIIO_ADAPTER=ON \ -DHERMES_MPICH=ON \ --DHERMES_ENABLE_SDTIO_ADAPTER=ON \ +-DHERMES_ENABLE_STDIO_ADAPTER=ON \ -DHERMES_ENABLE_POSIX_ADAPTER=ON \ -DHERMES_ENABLE_COVERAGE=ON make -j8 From 816ad9eaeb52702d8b44aa7e103892abf1f3ba6c Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Mon, 18 Dec 2023 16:10:34 -0600 Subject: [PATCH 04/19] Add jarvis to deps --- ci/build_hermes.sh | 8 ++++++++ ci/install_deps.sh | 5 ----- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/ci/build_hermes.sh b/ci/build_hermes.sh index bb6e4eac0..f3246e8ca 100755 --- a/ci/build_hermes.sh +++ b/ci/build_hermes.sh @@ -13,10 +13,18 @@ INSTALL_DIR="${HOME}" SPACK_DIR=${INSTALL_DIR}/spack . ${SPACK_DIR}/share/spack/setup-env.sh +# Load hermes_shm mkdir -p "${HOME}/install" mkdir build cd build spack load hermes_shm + +# Install jarvis-cd +git clone https://github.com/grc-iit/jarvis-cd.git +cd jarvis-cd +pip install -e . -r requirements.txt + +# Build Hermes cmake ../ \ -DCMAKE_BUILD_TYPE=Debug \ -DCMAKE_INSTALL_PREFIX="${HOME}/install" \ diff --git a/ci/install_deps.sh b/ci/install_deps.sh index ae79ee683..0fac0a6f7 100755 --- a/ci/install_deps.sh +++ b/ci/install_deps.sh @@ -29,11 +29,6 @@ set +x . ${SPACK_DIR}/share/spack/setup-env.sh set -x -# Install jarvis-cd -git clone https://github.com/grc-iit/jarvis-cd.git -cd jarvis-cd -pip install -e . -r requirements.txt - # This will allow Spack to skip building some packages that are directly # available from the system. For example, autoconf, cmake, m4, etc. # Modify ci/pckages.yaml to skip building compilers or build tools via Spack. From f02f2ca87a2ae0efd46be9ad7a5d520969e2d854 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Mon, 18 Dec 2023 19:49:11 -0600 Subject: [PATCH 05/19] Use pushd and popd --- ci/build_hermes.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ci/build_hermes.sh b/ci/build_hermes.sh index f3246e8ca..631a2c7e7 100755 --- a/ci/build_hermes.sh +++ b/ci/build_hermes.sh @@ -21,8 +21,9 @@ spack load hermes_shm # Install jarvis-cd git clone https://github.com/grc-iit/jarvis-cd.git -cd jarvis-cd +pushd jarvis-cd pip install -e . -r requirements.txt +popd # Build Hermes cmake ../ \ From fad94eb77eae160446d4faf0a01332d2e34a338a Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Tue, 19 Dec 2023 01:22:33 -0600 Subject: [PATCH 06/19] Beginning docker build --- README.md | 7 ++++ ci/build_hermes.sh | 4 ++ ci/hermes/packages/hermes_shm/package.py | 2 +- docker/deps.Dockerfile | 40 ++++++++++++++++++++ tasks/bdev/include/bdev/bdev_tasks.h | 14 +++++-- tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc | 2 +- 6 files changed, 64 insertions(+), 5 deletions(-) create mode 100644 docker/deps.Dockerfile diff --git a/README.md b/README.md index 4bdeec1cc..727eb9e93 100644 --- a/README.md +++ b/README.md @@ -53,3 +53,10 @@ make install ## Contributing We follow the [Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html). You can run `make lint` to ensure that your code conforms to the style. This requires the `cpplint` Python module (`pip install cpplint`). Alternatively, you can let the CI build inform you of required style changes. + +## Docker + +``` +sudo docker build -t hermes_deps ${HOME}/Documents/Projects/PhD/hermes -f docker/deps.Dockerfile +sudo docker run -it --name hermes_deps_c --network host hermes_deps +``` \ No newline at end of file diff --git a/ci/build_hermes.sh b/ci/build_hermes.sh index 631a2c7e7..5d123f842 100755 --- a/ci/build_hermes.sh +++ b/ci/build_hermes.sh @@ -8,6 +8,10 @@ set -x set -e set -o pipefail +# Download from dockerhub +docker pull lukemartinlogan/hermes_deps:latest +docker run lukemartinlogan/hermes_deps:latest + # Set spack env INSTALL_DIR="${HOME}" SPACK_DIR=${INSTALL_DIR}/spack diff --git a/ci/hermes/packages/hermes_shm/package.py b/ci/hermes/packages/hermes_shm/package.py index f2fee6b6d..1749fb515 100644 --- a/ci/hermes/packages/hermes_shm/package.py +++ b/ci/hermes/packages/hermes_shm/package.py @@ -24,7 +24,7 @@ class HermesShm(CMakePackage): depends_on('cereal') depends_on('yaml-cpp') depends_on('libaio') - depends_on('doxygen@1.9.3') + depends_on('doxygen') # @1.9.3 depends_on('boost@1.7: +context +fiber +filesystem +system +atomic +chrono +serialization +signals +pic +regex') depends_on('libfabric fabrics=sockets,tcp,udp,verbs', when='+ares') diff --git a/docker/deps.Dockerfile b/docker/deps.Dockerfile new file mode 100644 index 000000000..a1b458297 --- /dev/null +++ b/docker/deps.Dockerfile @@ -0,0 +1,40 @@ +# Install ubuntu 22.04 +FROM ubuntu:22.04 +LABEL maintainer="llogan@hawk.iit.edu" +LABEL version="0.0" +LABEL description="Hermes Docker image with CI" + +# Disable Prompt During Packages Installation +ARG DEBIAN_FRONTEND=noninteractive + +# Update ubuntu +RUN apt update && apt install + +# Install some basic packages +RUN apt install -y \ + openssh-server \ + sudo \ + git \ + gcc g++ gfortran make binutils gpg \ + tar zip xz-utils 7zip bzip2 \ + perl m4 libncurses5-dev libxml2-dev diffutils \ + pkg-config cmake pkg-config \ + python3 python3-pip doxygen + +ENV INSTALL_DIR="${HOME}" +ENV SPACK_DIR="${INSTALL_DIR}/spack" +ENV SPACK_VERSION="v0.20.2" +ENV HERMES_DEPS_DIR="${HOME}/hermes_deps" + +# Install Spack +RUN git clone -b ${SPACK_VERSION} https://github.com/spack/spack ${SPACK_DIR} && \ + . ${SPACK_DIR}/share/spack/setup-env.sh && \ + git clone -b dev https://github.com/HDFGroup/hermes.git ${HERMES_DEPS_DIR} && \ + spack repo add ${HERMES_DEPS_DIR}/ci/hermes && \ + spack external find #&& \ + # spack install hermes_shm@master+vfd+mpiio^mpich@3.3.2 + +# Install jarvis-cd +RUN git clone https://github.com/grc-iit/jarvis-cd.git && \ + pushd jarvis-cd && \ + pip install -e . -r requirements.txt diff --git a/tasks/bdev/include/bdev/bdev_tasks.h b/tasks/bdev/include/bdev/bdev_tasks.h index 4333ba3ad..34ebc841a 100644 --- a/tasks/bdev/include/bdev/bdev_tasks.h +++ b/tasks/bdev/include/bdev/bdev_tasks.h @@ -182,13 +182,19 @@ struct WriteTask : public Task, TaskFlags { size_t disk_off, size_t size) : Task(alloc) { // Initialize task + static int counter = 0; task_node_ = task_node; - lane_hash_ = disk_off; - prio_ = TaskPrio::kHighLatency; + lane_hash_ = ++counter; + if (size < KILOBYTES(8)) { + prio_ = TaskPrio::kLowLatency; + } else { + prio_ = TaskPrio::kHighLatency; + } task_state_ = state_id; method_ = Method::kWrite; task_flags_.SetBits(TASK_UNORDERED | TASK_REMOTE_DEBUG_MARK); domain_id_ = domain_id; + counter += 1; // Free params buf_ = buf; @@ -226,9 +232,11 @@ struct ReadTask : public Task, TaskFlags { char *buf, size_t disk_off, size_t size) : Task(alloc) { + static int counter = 0; // Initialize task task_node_ = task_node; - lane_hash_ = disk_off; + lane_hash_ = counter; + ++counter; if (size < KILOBYTES(8)) { prio_ = TaskPrio::kLowLatency; } else { diff --git a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc index 2f156f265..8d4632a96 100644 --- a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc +++ b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc @@ -139,7 +139,7 @@ class Server : public TaskLib { bkt_mdm_.Init(task->bkt_mdm_); stager_mdm_.Init(task->stager_mdm_); op_mdm_.Init(task->op_mdm_); - flush_task_ = blob_mdm_.AsyncFlushData(task->task_node_ + 1); + // flush_task_ = blob_mdm_.AsyncFlushData(task->task_node_ + 1); } task->SetModuleComplete(); } From e0d6059dcd5cf9cc53fd3caafcdbcf8361b9d03a Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Tue, 19 Dec 2023 04:42:47 -0600 Subject: [PATCH 07/19] Initial dockerfile + dockerhub pull created --- .github/workflows/main.yml | 2 +- README.md | 4 +- ci/build_hermes.sh | 24 ++------ ci/install_deps.sh | 39 +++---------- docker/deps.Dockerfile | 15 +++-- docker/packages.yaml | 61 ++++++++++++++++++++ tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc | 4 +- 7 files changed, 90 insertions(+), 59 deletions(-) create mode 100644 docker/packages.yaml diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 11fb9bef9..1cabaaa77 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -73,7 +73,7 @@ jobs: - name: Build and Test # if: steps.hermes-cache.outputs.cache-hit != 'true' - run: ci/build_hermes.sh + run: docker exec /hermes_deps_c hermes/ci/build_hermes.sh # - name: Test # run: bash ci/test_hermes.sh diff --git a/README.md b/README.md index 727eb9e93..0ad6b4afe 100644 --- a/README.md +++ b/README.md @@ -57,6 +57,6 @@ We follow the [Google C++ Style Guide](https://google.github.io/styleguide/cppgu ## Docker ``` -sudo docker build -t hermes_deps ${HOME}/Documents/Projects/PhD/hermes -f docker/deps.Dockerfile -sudo docker run -it --name hermes_deps_c --network host hermes_deps +docker build -t hermes_deps ${HOME}/Documents/Projects/PhD/hermes -f docker/deps.Dockerfile +docker run -it --mount src=${PWD},target=/hermes,type=bind --name hermes_deps_c --network host hermes_deps ``` \ No newline at end of file diff --git a/ci/build_hermes.sh b/ci/build_hermes.sh index 5d123f842..391d96401 100755 --- a/ci/build_hermes.sh +++ b/ci/build_hermes.sh @@ -1,6 +1,7 @@ #!/bin/bash -# CD into git workspace +# THIS SCRIPT IS EXECUTED BY CONTAINER!!! +# CD into Hermes directory in container cd ${GITHUB_WORKSPACE} git submodule update --init @@ -8,28 +9,14 @@ set -x set -e set -o pipefail -# Download from dockerhub -docker pull lukemartinlogan/hermes_deps:latest -docker run lukemartinlogan/hermes_deps:latest - -# Set spack env -INSTALL_DIR="${HOME}" -SPACK_DIR=${INSTALL_DIR}/spack +# Load hermes_shm . ${SPACK_DIR}/share/spack/setup-env.sh +spack load hermes_shm -# Load hermes_shm +# Build Hermes mkdir -p "${HOME}/install" mkdir build cd build -spack load hermes_shm - -# Install jarvis-cd -git clone https://github.com/grc-iit/jarvis-cd.git -pushd jarvis-cd -pip install -e . -r requirements.txt -popd - -# Build Hermes cmake ../ \ -DCMAKE_BUILD_TYPE=Debug \ -DCMAKE_INSTALL_PREFIX="${HOME}/install" \ @@ -41,6 +28,7 @@ cmake ../ \ make -j8 make install +# Test Hermes export CXXFLAGS=-Wall ctest -VV diff --git a/ci/install_deps.sh b/ci/install_deps.sh index 0fac0a6f7..4c45e8116 100755 --- a/ci/install_deps.sh +++ b/ci/install_deps.sh @@ -3,39 +3,14 @@ # CD into git workspace cd ${GITHUB_WORKSPACE} -# This script will build and install them via Spack from source -# because Hermes requires a very specific version and configuration options -# for each package. - set -x set -e set -o pipefail -# Change this especially when your $HOME doesn't have enough disk space. -INSTALL_DIR="${HOME}" -SPACK_DIR=${INSTALL_DIR}/spack -SPACK_VERSION=0.20.2 - -echo "Installing dependencies at ${INSTALL_DIR}" -mkdir -p ${INSTALL_DIR} - -# Load Spack -git clone https://github.com/spack/spack ${SPACK_DIR} -cd ${SPACK_DIR} -git checkout v${SPACK_VERSION} - -# Set spack env -set +x -. ${SPACK_DIR}/share/spack/setup-env.sh -set -x - -# This will allow Spack to skip building some packages that are directly -# available from the system. For example, autoconf, cmake, m4, etc. -# Modify ci/pckages.yaml to skip building compilers or build tools via Spack. -cd ${GITHUB_WORKSPACE} -cp ci/packages.yaml ${SPACK_DIR}/etc/spack/packages.yaml - -# Install hermes_shm (needed for dependencies) -# -spack repo add ci/hermes -spack install hermes_shm@master+vfd+mpiio^mpich@3.3.2 +# Pull the Hermes dependencies image +docker pull lukemartinlogan/hermes_deps:latest +docker run \ +--mount src=${PWD},target=/hermes,type=bind \ +--name hermes_deps_c \ +--network host \ +lukemartinlogan/hermes_deps diff --git a/docker/deps.Dockerfile b/docker/deps.Dockerfile index a1b458297..0719682de 100644 --- a/docker/deps.Dockerfile +++ b/docker/deps.Dockerfile @@ -25,16 +25,23 @@ ENV INSTALL_DIR="${HOME}" ENV SPACK_DIR="${INSTALL_DIR}/spack" ENV SPACK_VERSION="v0.20.2" ENV HERMES_DEPS_DIR="${HOME}/hermes_deps" +ENV HERMES_DIR="${HOME}/hermes" # Install Spack RUN git clone -b ${SPACK_VERSION} https://github.com/spack/spack ${SPACK_DIR} && \ . ${SPACK_DIR}/share/spack/setup-env.sh && \ - git clone -b dev https://github.com/HDFGroup/hermes.git ${HERMES_DEPS_DIR} && \ + git clone -b dev https://github.com/lukemartinlogan/hermes.git ${HERMES_DEPS_DIR} && \ + # git clone -b dev https://github.com/HDFGroup/hermes.git ${HERMES_DEPS_DIR} && \ spack repo add ${HERMES_DEPS_DIR}/ci/hermes && \ - spack external find #&& \ - # spack install hermes_shm@master+vfd+mpiio^mpich@3.3.2 + mkdir -p ${HERMES_DIR} && \ + spack external find + +# COPY docker/packages.yaml ~/.spack/packages.yaml + +RUN . ${SPACK_DIR}/share/spack/setup-env.sh && \ + spack install hermes_shm@master+vfd+mpiio^mpich@3.3.2 # Install jarvis-cd RUN git clone https://github.com/grc-iit/jarvis-cd.git && \ - pushd jarvis-cd && \ + cd jarvis-cd && \ pip install -e . -r requirements.txt diff --git a/docker/packages.yaml b/docker/packages.yaml new file mode 100644 index 000000000..ce7c9526e --- /dev/null +++ b/docker/packages.yaml @@ -0,0 +1,61 @@ +packages: + perl: + externals: + - spec: perl@5.34.0~cpanm+open+shared+threads + prefix: /usr + cmake: + externals: + - spec: cmake@3.22.1 + prefix: /usr + m4: + externals: + - spec: m4@1.4.18 + prefix: /usr + binutils: + externals: + - spec: binutils@2.38 + prefix: /usr + pkg-config: + externals: + - spec: pkg-config@0.29.2 + prefix: /usr + findutils: + externals: + - spec: findutils@4.8.0 + prefix: /usr + coreutils: + externals: + - spec: coreutils@8.32 + prefix: /usr + gmake: + externals: + - spec: gmake@4.3 + prefix: /usr + openssl: + externals: + - spec: openssl@3.0.2 + prefix: /usr + git: + externals: + - spec: git@2.34.1~tcltk + prefix: /usr + doxygen: + externals: + - spec: doxygen@1.9.1~graphviz~mscgen + prefix: /usr + openssh: + externals: + - spec: openssh@8.9p1 + prefix: /usr + tar: + externals: + - spec: tar@1.34 + prefix: /usr + diffutils: + externals: + - spec: diffutils@3.8 + prefix: /usr + bzip2: + externals: + - spec: bzip2@1.0.8 + prefix: /usr diff --git a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc index 8d4632a96..ceac09b83 100644 --- a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc +++ b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc @@ -343,8 +343,8 @@ class Server : public TaskLib { task->blob_id_ = GetOrCreateBlobId(task->tag_id_, task->lane_hash_, blob_name, rctx, task->flags_); } - HILOG(kDebug, "Beginning PUT for (hash: {}) {}", - std::hash{}(blob_name), blob_name.str()); + HILOG(kDebug, "Beginning PUT for (hash: {})", + std::hash{}(blob_name)); BLOB_MAP_T &blob_map = blob_map_[rctx.lane_id_]; BlobInfo &blob_info = blob_map[task->blob_id_]; blob_info.score_ = task->score_; From 6aa00903d298e0bf4fd951ace8573ab4930cdb48 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Tue, 19 Dec 2023 06:49:12 -0600 Subject: [PATCH 08/19] Add resource graph --- .github/workflows/main.yml | 18 +++----- README.md | 5 ++- ci/build_hermes.sh | 63 ++++++++++++++++++++-------- ci/install_deps.sh | 8 ++-- ci/packages.yaml | 38 ----------------- ci/resource_graph.yaml | 73 +++++++++++++++++++++++++++++++++ docker/deps.Dockerfile | 12 ++++-- docker/packages.yaml | 61 --------------------------- test/unit/hermes/CMakeLists.txt | 2 +- 9 files changed, 140 insertions(+), 140 deletions(-) delete mode 100644 ci/packages.yaml create mode 100644 ci/resource_graph.yaml delete mode 100644 docker/packages.yaml diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 1cabaaa77..00f150403 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -57,22 +57,14 @@ jobs: - name: Install APT Dependencies run: | sudo apt update - sudo apt-get install -y autoconf - sudo apt-get install -y automake - sudo apt-get install -y libtool - sudo apt-get install -y libtool-bin - sudo apt-get install -y mpich - sudo apt-get install -y lcov - sudo apt-get install -y zlib1g-dev - sudo apt-get install -y libsdl2-dev - sudo apt-get install -y hdf5-tools + sudo apt-get install -y docker - name: Build And Install Dependencies - if: steps.spack-cache.outputs.cache-hit != 'true' +# if: steps.spack-cache.outputs.cache-hit != 'true' run: ci/install_deps.sh - name: Build and Test - # if: steps.hermes-cache.outputs.cache-hit != 'true' +# if: steps.hermes-cache.outputs.cache-hit != 'true' run: docker exec /hermes_deps_c hermes/ci/build_hermes.sh # - name: Test @@ -86,8 +78,8 @@ jobs: # - name: Multi-node Test # run: pushd ci/cluster && ./multi_node_ci_test.sh -# - name: Generate coverage file -# run: bash ci/coverage.sh "${GITHUB_WORKSPACE}/coverage" "${GITHUB_WORKSPACE}/build" + - name: Generate coverage file + run: docker exec /hermes_deps_c hermes/ci/coverage.sh "hermes/coverage" "hermes/build" # - name: Coveralls # uses: coverallsapp/github-action@master diff --git a/README.md b/README.md index 0ad6b4afe..ff0fafd37 100644 --- a/README.md +++ b/README.md @@ -57,6 +57,7 @@ We follow the [Google C++ Style Guide](https://google.github.io/styleguide/cppgu ## Docker ``` -docker build -t hermes_deps ${HOME}/Documents/Projects/PhD/hermes -f docker/deps.Dockerfile -docker run -it --mount src=${PWD},target=/hermes,type=bind --name hermes_deps_c --network host hermes_deps +docker build -t lukemartinlogan/hermes_deps . -f docker/deps.Dockerfile +docker run -it --mount src=${PWD},target=/hermes,type=bind --name hermes_deps_c --network host lukemartinlogan/hermes_deps +docker push lukemartinlogan/hermes_deps ``` \ No newline at end of file diff --git a/ci/build_hermes.sh b/ci/build_hermes.sh index 391d96401..ac2bfa566 100755 --- a/ci/build_hermes.sh +++ b/ci/build_hermes.sh @@ -1,25 +1,56 @@ #!/bin/bash # THIS SCRIPT IS EXECUTED BY CONTAINER!!! -# CD into Hermes directory in container -cd ${GITHUB_WORKSPACE} -git submodule update --init - set -x set -e set -o pipefail +# Update jarvis-cd +pushd jarvis-cd +git pull +pip install -e . -r requirements.txt +popd + +# Update scspkg +pushd scspkg +git pull +pip install -e . -r requirements.txt +popd + +# Load scspkg environment +if ! shopt -q login_shell; then + if [ -d /etc/profile.d ]; then + for i in /etc/profile.d/*.sh; do + if [ -r $i ]; then + . $i + fi + done + fi +fi +module use "$(scspkg module dir)" + +# CD into Hermes directory in container +git config --global --add safe.directory '*' +cd hermes +git submodule update --init + # Load hermes_shm . ${SPACK_DIR}/share/spack/setup-env.sh spack load hermes_shm +# Create Hermes module +scspkg create hermes +scspkg env prepend hermes PATH /hermes/build +scspkg env prepend hermes LIBRARY_PATH /hermes/build +scspkg env prepend hermes LD_LIBRARY_PATH /hermes/build +module load hermes + # Build Hermes -mkdir -p "${HOME}/install" -mkdir build +mkdir -p build cd build cmake ../ \ -DCMAKE_BUILD_TYPE=Debug \ --DCMAKE_INSTALL_PREFIX="${HOME}/install" \ +-DCMAKE_INSTALL_PREFIX="$(scspkg pkg root hermes)" \ -DHERMES_ENABLE_MPIIO_ADAPTER=ON \ -DHERMES_MPICH=ON \ -DHERMES_ENABLE_STDIO_ADAPTER=ON \ @@ -28,20 +59,18 @@ cmake ../ \ make -j8 make install +# Initialize the Jarvis testing Hermes environment +jarvis init \ +"${HOME}/jarvis-config" \ +"${HOME}/jarvis-priv" \ +"${HOME}/jarvis-shared" +cp /hermes/ci/resource_graph.yaml /jarvis-cd/config/resource_graph.yaml +jarvis env build hermes + # Test Hermes export CXXFLAGS=-Wall ctest -VV -# Set proper flags for cmake to find Hermes -INSTALL_PREFIX="${HOME}/install" -export LIBRARY_PATH="${INSTALL_PREFIX}/lib:${LIBRARY_PATH}" -export LD_LIBRARY_PATH="${INSTALL_PREFIX}/lib:${LD_LIBRARY_PATH}" -export LDFLAGS="-L${INSTALL_PREFIX}/lib:${LDFLAGS}" -export CFLAGS="-I${INSTALL_PREFIX}/include:${CFLAGS}" -export CPATH="${INSTALL_PREFIX}/include:${CPATH}" -export CMAKE_PREFIX_PATH="${INSTALL_PREFIX}:${CMAKE_PREFIX_PATH}" -export CXXFLAGS="-I${INSTALL_PREFIX}/include:${CXXFLAGS}" - # Run make install unit test cd test/unit/external mkdir build diff --git a/ci/install_deps.sh b/ci/install_deps.sh index 4c45e8116..523280275 100755 --- a/ci/install_deps.sh +++ b/ci/install_deps.sh @@ -1,16 +1,14 @@ #!/bin/bash -# CD into git workspace -cd ${GITHUB_WORKSPACE} - set -x set -e set -o pipefail # Pull the Hermes dependencies image docker pull lukemartinlogan/hermes_deps:latest -docker run \ +docker run -d \ --mount src=${PWD},target=/hermes,type=bind \ --name hermes_deps_c \ --network host \ -lukemartinlogan/hermes_deps +lukemartinlogan/hermes_deps \ +tail -f /dev/null diff --git a/ci/packages.yaml b/ci/packages.yaml deleted file mode 100644 index 7be206aad..000000000 --- a/ci/packages.yaml +++ /dev/null @@ -1,38 +0,0 @@ -packages: - all: - target: [x86_64] - mpich: - externals: - - spec: mpich@3.3.2 - prefix: /usr - buildable: False - cmake: - externals: - - spec: cmake@3.23.2 - prefix: /usr/local - buildable: False - autoconf: - externals: - - spec: autoconf@2.69 - prefix: /usr - buildable: False - automake: - externals: - - spec: automake@1.16 - prefix: /usr - buildable: False - libtool: - externals: - - spec: libtool@2.4.6 - prefix: /usr - buildable: False - m4: - externals: - - spec: m4@1.4.18 - prefix: /usr - buildable: False - pkg-config: - externals: - - spec: pkg-config@0.29.1 - prefix: /usr - buildable: False diff --git a/ci/resource_graph.yaml b/ci/resource_graph.yaml new file mode 100644 index 000000000..8573ae7e1 --- /dev/null +++ b/ci/resource_graph.yaml @@ -0,0 +1,73 @@ +fs: +- avail: 17179869184 + dev_type: hdd + device: /dev/hdd1 + fs_mount: /hdd + fs_size: 16G + fs_type: ext4 + host: localhost + label: null + model: CT500P1SSD8 + mount: /hdd + parent: /dev/hdd + partlabel: null + partuuid: d4ff55be-ac9f-4bd3-9602-48e8b673fa37 + rota: false + shared: false + size: 17179869184 + tran: nvme + use%: 0% + used: 0G + uuid: 8debe05d-9177-40b5-8bb0-82fc969ce919 +- avail: 17179869184 + dev_type: ssd + device: /dev/ssd1 + fs_mount: /ssd + fs_size: 16G + fs_type: ext4 + host: localhost + label: null + model: CT500P1SSD8 + mount: /ssd + parent: /dev/ssd + partlabel: null + partuuid: d4ff55be-ac9f-4bd3-9602-48e8b673fa37 + rota: false + shared: false + size: 17179869184 + tran: nvme + use%: 0% + used: 0G + uuid: 8debe05d-9177-40b5-8bb0-82fc969ce919 +- avail: 17179869184 + dev_type: nvme + device: /dev/nvme1n1p3 + fs_mount: /nvme + fs_size: 16G + fs_type: ext4 + host: localhost + label: null + model: CT500P1SSD8 + mount: /nvme + parent: /dev/nvme1n1 + partlabel: null + partuuid: d4ff55be-ac9f-4bd3-9602-48e8b673fa37 + rota: false + shared: false + size: 17179869184 + tran: nvme + use%: 0% + used: 0G + uuid: 8debe05d-9177-40b5-8bb0-82fc969ce919 +hosts: +- localhost +net: +- domain: lo + fabric: 127.0.0.0/8 + host: localhost + protocol: FI_PROTO_SOCK_TCP + provider: sockets + shared: false + speed: 1073741824 + type: FI_EP_RDM + version: '2.0' diff --git a/docker/deps.Dockerfile b/docker/deps.Dockerfile index 0719682de..4e6e8e72e 100644 --- a/docker/deps.Dockerfile +++ b/docker/deps.Dockerfile @@ -19,7 +19,9 @@ RUN apt install -y \ tar zip xz-utils 7zip bzip2 \ perl m4 libncurses5-dev libxml2-dev diffutils \ pkg-config cmake pkg-config \ - python3 python3-pip doxygen + python3 python3-pip doxygen \ + lcov zlib1g-dev hdf5-tools \ + lmod ENV INSTALL_DIR="${HOME}" ENV SPACK_DIR="${INSTALL_DIR}/spack" @@ -36,8 +38,7 @@ RUN git clone -b ${SPACK_VERSION} https://github.com/spack/spack ${SPACK_DIR} && mkdir -p ${HERMES_DIR} && \ spack external find -# COPY docker/packages.yaml ~/.spack/packages.yaml - +# Install hermes_shm RUN . ${SPACK_DIR}/share/spack/setup-env.sh && \ spack install hermes_shm@master+vfd+mpiio^mpich@3.3.2 @@ -45,3 +46,8 @@ RUN . ${SPACK_DIR}/share/spack/setup-env.sh && \ RUN git clone https://github.com/grc-iit/jarvis-cd.git && \ cd jarvis-cd && \ pip install -e . -r requirements.txt + +# Install scspkg +RUN git clone https://github.com/grc-iit/scspkg.git && \ + cd scspkg && \ + pip install -e . -r requirements.txt diff --git a/docker/packages.yaml b/docker/packages.yaml deleted file mode 100644 index ce7c9526e..000000000 --- a/docker/packages.yaml +++ /dev/null @@ -1,61 +0,0 @@ -packages: - perl: - externals: - - spec: perl@5.34.0~cpanm+open+shared+threads - prefix: /usr - cmake: - externals: - - spec: cmake@3.22.1 - prefix: /usr - m4: - externals: - - spec: m4@1.4.18 - prefix: /usr - binutils: - externals: - - spec: binutils@2.38 - prefix: /usr - pkg-config: - externals: - - spec: pkg-config@0.29.2 - prefix: /usr - findutils: - externals: - - spec: findutils@4.8.0 - prefix: /usr - coreutils: - externals: - - spec: coreutils@8.32 - prefix: /usr - gmake: - externals: - - spec: gmake@4.3 - prefix: /usr - openssl: - externals: - - spec: openssl@3.0.2 - prefix: /usr - git: - externals: - - spec: git@2.34.1~tcltk - prefix: /usr - doxygen: - externals: - - spec: doxygen@1.9.1~graphviz~mscgen - prefix: /usr - openssh: - externals: - - spec: openssh@8.9p1 - prefix: /usr - tar: - externals: - - spec: tar@1.34 - prefix: /usr - diffutils: - externals: - - spec: diffutils@3.8 - prefix: /usr - bzip2: - externals: - - spec: bzip2@1.0.8 - prefix: /usr diff --git a/test/unit/hermes/CMakeLists.txt b/test/unit/hermes/CMakeLists.txt index e74e0d63f..0e16a098f 100644 --- a/test/unit/hermes/CMakeLists.txt +++ b/test/unit/hermes/CMakeLists.txt @@ -16,7 +16,7 @@ add_dependencies(test_hermes_exec ${Hermes_CLIENT_DEPS} hermes) target_link_libraries(test_hermes_exec ${Hermes_CLIENT_LIBRARIES} hermes Catch2::Catch2 MPI::MPI_CXX) -jarvis_test(test_hermes test_hermes) +jarvis_test(hermes test_hermes) #------------------------------------------------------------------------------ # Test Cases From 24deb2da915c0d9220a619c840c7471f149a33bc Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Wed, 20 Dec 2023 00:57:50 -0600 Subject: [PATCH 09/19] Re-enable flushing. Change external compile. --- CMakeLists.txt | 2 + README.md | 24 ++++++++- ci/build_hermes.sh | 51 +++++++++----------- ci/module_load.sh | 10 ++++ docker/deps.Dockerfile | 30 ++++++++---- tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc | 2 +- test/unit/external/CMakeLists.txt | 5 +- test/unit/external/external.cc | 44 +++++++++++++++++ 8 files changed, 125 insertions(+), 43 deletions(-) create mode 100644 ci/module_load.sh create mode 100644 test/unit/external/external.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index 0ab8153ce..d2a0c09b5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -74,6 +74,8 @@ add_custom_target(coverage COMMAND bash ${CMAKE_SOURCE_DIR}/ci/coverage.sh #----------------------------------------------------------------------------- # Find Packages #----------------------------------------------------------------------------- +# This is for compatability with SPACK +SET(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) # HermesShm find_package(HermesShm CONFIG REQUIRED) diff --git a/README.md b/README.md index ff0fafd37..bfea6a49b 100644 --- a/README.md +++ b/README.md @@ -56,8 +56,30 @@ We follow the [Google C++ Style Guide](https://google.github.io/styleguide/cppgu ## Docker +Build container with Hermes dependencies: ``` docker build -t lukemartinlogan/hermes_deps . -f docker/deps.Dockerfile -docker run -it --mount src=${PWD},target=/hermes,type=bind --name hermes_deps_c --network host lukemartinlogan/hermes_deps +``` + +Run the container with the Hermes source mounted: +``` +docker run -it --mount src=${PWD},target=/hermes,type=bind \ +--name hermes_deps_c \ +--network host \ +--memory=4G \ +--shm-size=4G \ +-p 4000:4000 \ +-p 4001:4001 \ +lukemartinlogan/hermes_deps +``` + +Build Hermes + Jarvis (in container): +``` +bash /hermes/ci/build_hermes.sh +``` + +``` docker push lukemartinlogan/hermes_deps +docker stop /hermes_deps_c +docker rm /hermes_deps_c ``` \ No newline at end of file diff --git a/ci/build_hermes.sh b/ci/build_hermes.sh index ac2bfa566..4557b6e84 100755 --- a/ci/build_hermes.sh +++ b/ci/build_hermes.sh @@ -1,5 +1,8 @@ #!/bin/bash +# ARGS: +# SPACK_DIR: the path to spack + # THIS SCRIPT IS EXECUTED BY CONTAINER!!! set -x set -e @@ -18,33 +21,35 @@ pip install -e . -r requirements.txt popd # Load scspkg environment -if ! shopt -q login_shell; then - if [ -d /etc/profile.d ]; then - for i in /etc/profile.d/*.sh; do - if [ -r $i ]; then - . $i - fi - done - fi -fi +. /module_load.sh module use "$(scspkg module dir)" -# CD into Hermes directory in container -git config --global --add safe.directory '*' -cd hermes -git submodule update --init - # Load hermes_shm -. ${SPACK_DIR}/share/spack/setup-env.sh +. "${SPACK_DIR}/share/spack/setup-env.sh" +spack module tcl refresh --delete-tree -y spack load hermes_shm +# module use "${SPACK_DIR}/share/spack/modules/linux-ubuntu22.04-zen2" # Create Hermes module scspkg create hermes -scspkg env prepend hermes PATH /hermes/build -scspkg env prepend hermes LIBRARY_PATH /hermes/build -scspkg env prepend hermes LD_LIBRARY_PATH /hermes/build +scspkg env prepend hermes PATH /hermes/build/bin +scspkg env prepend hermes LIBRARY_PATH /hermes/build/bin +scspkg env prepend hermes LD_LIBRARY_PATH /hermes/build/bin module load hermes +# Initialize the Jarvis testing Hermes environment +jarvis init \ +"${HOME}/jarvis-config" \ +"${HOME}/jarvis-priv" \ +"${HOME}/jarvis-shared" +cp /hermes/ci/resource_graph.yaml /jarvis-cd/config/resource_graph.yaml +jarvis env build hermes + +# CD into Hermes directory in container +cd /hermes +git config --global --add safe.directory '*' +git submodule update --init + # Build Hermes mkdir -p build cd build @@ -59,20 +64,12 @@ cmake ../ \ make -j8 make install -# Initialize the Jarvis testing Hermes environment -jarvis init \ -"${HOME}/jarvis-config" \ -"${HOME}/jarvis-priv" \ -"${HOME}/jarvis-shared" -cp /hermes/ci/resource_graph.yaml /jarvis-cd/config/resource_graph.yaml -jarvis env build hermes - # Test Hermes export CXXFLAGS=-Wall ctest -VV # Run make install unit test -cd test/unit/external +cd /hermes/test/unit/external mkdir build cd build cmake ../ diff --git a/ci/module_load.sh b/ci/module_load.sh new file mode 100644 index 000000000..46706ff02 --- /dev/null +++ b/ci/module_load.sh @@ -0,0 +1,10 @@ +#!/bin/bash +if ! shopt -q login_shell; then + if [ -d /etc/profile.d ]; then + for i in /etc/profile.d/*.sh; do + if [ -r $i ]; then + . $i + fi + done + fi +fi diff --git a/docker/deps.Dockerfile b/docker/deps.Dockerfile index 4e6e8e72e..e55eb9ca9 100644 --- a/docker/deps.Dockerfile +++ b/docker/deps.Dockerfile @@ -1,5 +1,5 @@ -# Install ubuntu 22.04 -FROM ubuntu:22.04 +# Install ubuntu 20.04 +FROM ubuntu:20.04 LABEL maintainer="llogan@hawk.iit.edu" LABEL version="0.0" LABEL description="Hermes Docker image with CI" @@ -8,6 +8,7 @@ LABEL description="Hermes Docker image with CI" ARG DEBIAN_FRONTEND=noninteractive # Update ubuntu +SHELL ["/bin/bash", "-c"] RUN apt update && apt install # Install some basic packages @@ -16,22 +17,30 @@ RUN apt install -y \ sudo \ git \ gcc g++ gfortran make binutils gpg \ - tar zip xz-utils 7zip bzip2 \ + tar zip xz-utils bzip2 \ perl m4 libncurses5-dev libxml2-dev diffutils \ pkg-config cmake pkg-config \ python3 python3-pip doxygen \ lcov zlib1g-dev hdf5-tools \ - lmod - -ENV INSTALL_DIR="${HOME}" -ENV SPACK_DIR="${INSTALL_DIR}/spack" + build-essential ca-certificates \ + coreutils curl environment-modules \ + gfortran git gpg lsb-release python3 python3-distutils \ + python3-venv unzip zip \ + bash jq python gdbserver gdb + +# Setup basic environment +ENV USER="root" +ENV HOME="/root" +ENV SPACK_DIR="${HOME}/spack" ENV SPACK_VERSION="v0.20.2" ENV HERMES_DEPS_DIR="${HOME}/hermes_deps" ENV HERMES_DIR="${HOME}/hermes" +COPY ci/module_load.sh /module_load.sh # Install Spack -RUN git clone -b ${SPACK_VERSION} https://github.com/spack/spack ${SPACK_DIR} && \ - . ${SPACK_DIR}/share/spack/setup-env.sh && \ +RUN . /module_load.sh && \ + git clone -b ${SPACK_VERSION} https://github.com/spack/spack ${SPACK_DIR} && \ + . "${SPACK_DIR}/share/spack/setup-env.sh" && \ git clone -b dev https://github.com/lukemartinlogan/hermes.git ${HERMES_DEPS_DIR} && \ # git clone -b dev https://github.com/HDFGroup/hermes.git ${HERMES_DEPS_DIR} && \ spack repo add ${HERMES_DEPS_DIR}/ci/hermes && \ @@ -39,7 +48,8 @@ RUN git clone -b ${SPACK_VERSION} https://github.com/spack/spack ${SPACK_DIR} && spack external find # Install hermes_shm -RUN . ${SPACK_DIR}/share/spack/setup-env.sh && \ +RUN . /module_load.sh && \ + . "${SPACK_DIR}/share/spack/setup-env.sh" && \ spack install hermes_shm@master+vfd+mpiio^mpich@3.3.2 # Install jarvis-cd diff --git a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc index ceac09b83..56217f89b 100644 --- a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc +++ b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc @@ -139,7 +139,7 @@ class Server : public TaskLib { bkt_mdm_.Init(task->bkt_mdm_); stager_mdm_.Init(task->stager_mdm_); op_mdm_.Init(task->op_mdm_); - // flush_task_ = blob_mdm_.AsyncFlushData(task->task_node_ + 1); + flush_task_ = blob_mdm_.AsyncFlushData(task->task_node_ + 1); } task->SetModuleComplete(); } diff --git a/test/unit/external/CMakeLists.txt b/test/unit/external/CMakeLists.txt index 5e9284ba1..a9fb9a944 100644 --- a/test/unit/external/CMakeLists.txt +++ b/test/unit/external/CMakeLists.txt @@ -1,15 +1,12 @@ cmake_minimum_required(VERSION 3.10) project(hermes) -include_directories(/home/lukemartinlogan/Documents/Projects/PhD/hermes/test/unit) set(CMAKE_CXX_STANDARD 17) find_package(Hermes REQUIRED) find_package(MPI REQUIRED COMPONENTS C CXX) include_directories(${Hermes_INCLUDE_DIRS}) add_executable(test_hermes_external_compile - ../../main_mpi.cc - ../hermes/test_init.cc - ../hermes/test_bucket.cc + external.cc ) target_link_libraries(test_hermes_external_compile ${Hermes_LIBRARIES} Catch2::Catch2 MPI::MPI_CXX) diff --git a/test/unit/external/external.cc b/test/unit/external/external.cc new file mode 100644 index 000000000..a39f5acdc --- /dev/null +++ b/test/unit/external/external.cc @@ -0,0 +1,44 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Distributed under BSD 3-Clause license. * + * Copyright by The HDF Group. * + * Copyright by the Illinois Institute of Technology. * + * All rights reserved. * + * * + * This file is part of Hermes. The full Hermes copyright notice, including * + * terms governing use, modification, and redistribution, is contained in * + * the COPYING file, which can be found at the top directory. If you do not * + * have access to the file, you may request a copy from help@hdfgroup.org. * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +#define CATCH_CONFIG_RUNNER +#include +#include +#include +#include +#include "hermes/hermes.h" + +namespace cl = Catch::Clara; +cl::Parser define_options(); + +int main(int argc, char **argv) { + int rc; + MPI_Init(&argc, &argv); + Catch::Session session; + auto cli = session.cli(); + session.cli(cli); + rc = session.applyCommandLine(argc, argv); + if (rc != 0) return rc; + rc = session.run(); + if (rc != 0) return rc; + MPI_Finalize(); + return rc; +} + +TEST_CASE("TestHermesConnect") { + int rank, nprocs; + MPI_Barrier(MPI_COMM_WORLD); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs); + HERMES->ClientInit(); + MPI_Barrier(MPI_COMM_WORLD); +} \ No newline at end of file From 2b94b6449137979d6dd71b143d6456df49f3dd9a Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Wed, 20 Dec 2023 01:00:14 -0600 Subject: [PATCH 10/19] Update dep install --- ci/install_deps.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ci/install_deps.sh b/ci/install_deps.sh index 523280275..764bfc0a5 100755 --- a/ci/install_deps.sh +++ b/ci/install_deps.sh @@ -10,5 +10,9 @@ docker run -d \ --mount src=${PWD},target=/hermes,type=bind \ --name hermes_deps_c \ --network host \ +--memory=4G \ +--shm-size=4G \ +-p 4000:4000 \ +-p 4001:4001 \ lukemartinlogan/hermes_deps \ tail -f /dev/null From faa8ee50f3e773728ef6757b5c5e7979499c92a9 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Wed, 20 Dec 2023 01:05:55 -0600 Subject: [PATCH 11/19] Remove specific doxy version --- README.md | 1 + ci/hermes/packages/hermes/package.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index bfea6a49b..23af1aa42 100644 --- a/README.md +++ b/README.md @@ -79,6 +79,7 @@ bash /hermes/ci/build_hermes.sh ``` ``` +docker commit hermes_deps_c lukemartinlogan/hermes_deps docker push lukemartinlogan/hermes_deps docker stop /hermes_deps_c docker rm /hermes_deps_c diff --git a/ci/hermes/packages/hermes/package.py b/ci/hermes/packages/hermes/package.py index aa41fe218..a21ec63b6 100644 --- a/ci/hermes/packages/hermes/package.py +++ b/ci/hermes/packages/hermes/package.py @@ -45,7 +45,7 @@ class Hermes(CMakePackage): depends_on('cereal') depends_on('yaml-cpp') depends_on('libaio') - depends_on('doxygen@1.9.3') + depends_on('doxygen') # @1.9.3 depends_on('boost@1.7: +context +fiber +filesystem +system +atomic +chrono +serialization +signals +pic +regex') depends_on('libfabric fabrics=sockets,tcp,udp,verbs', when='+ares') @@ -53,6 +53,7 @@ class Hermes(CMakePackage): when='+only_verbs') depends_on('libzmq', '+zmq') depends_on('hdf5@1.14.0', when='+vfd') + depends_on('adios2', when='+adios') def cmake_args(self): args = [] From fd69f51e934990ad683f514ba12a5fcfbb9a7d66 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Wed, 20 Dec 2023 09:38:17 -0600 Subject: [PATCH 12/19] Fix lint issues --- test/unit/external/external.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/unit/external/external.cc b/test/unit/external/external.cc index a39f5acdc..910d0eb08 100644 --- a/test/unit/external/external.cc +++ b/test/unit/external/external.cc @@ -41,4 +41,4 @@ TEST_CASE("TestHermesConnect") { MPI_Comm_size(MPI_COMM_WORLD, &nprocs); HERMES->ClientInit(); MPI_Barrier(MPI_COMM_WORLD); -} \ No newline at end of file +} From 2c53043c3c97280e494c64b5d85c3add864e82e9 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Wed, 20 Dec 2023 10:29:09 -0600 Subject: [PATCH 13/19] Use bash for script exec --- .github/workflows/main.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 00f150403..067e33a61 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -65,7 +65,7 @@ jobs: - name: Build and Test # if: steps.hermes-cache.outputs.cache-hit != 'true' - run: docker exec /hermes_deps_c hermes/ci/build_hermes.sh + run: docker exec /hermes_deps_c bash hermes/ci/build_hermes.sh # - name: Test # run: bash ci/test_hermes.sh @@ -79,7 +79,7 @@ jobs: # run: pushd ci/cluster && ./multi_node_ci_test.sh - name: Generate coverage file - run: docker exec /hermes_deps_c hermes/ci/coverage.sh "hermes/coverage" "hermes/build" + run: docker exec /hermes_deps_c bash hermes/ci/coverage.sh "hermes/coverage" "hermes/build" # - name: Coveralls # uses: coverallsapp/github-action@master From eee264d5da924dfbac498a6d8686a51281c8a56f Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Fri, 22 Dec 2023 08:03:12 -0600 Subject: [PATCH 14/19] Reduce the number of queues to just admin and proc for now --- hrun/include/hrun/api/hrun_client.h | 5 +- .../queue_manager/queue_manager_runtime.h | 16 +++--- hrun/include/hrun/task_registry/task.h | 10 ++-- hrun/include/hrun/task_registry/task_lib.h | 6 ++- .../TASK_NAME/include/TASK_NAME/TASK_NAME.h | 9 +--- .../hrun_admin/src/hrun_admin.cc | 8 +-- .../include/proc_queue/proc_queue.h | 10 +--- .../include/remote_queue/remote_queue.h | 11 +--- .../include/small_message/small_message.h | 8 +-- .../worch_proc_round_robin.h | 4 +- .../worch_queue_round_robin.h | 5 +- tasks/bdev/include/bdev/bdev.h | 9 +--- .../include/data_stager/data_stager.h | 8 +-- tasks/data_stager/src/data_stager.cc | 2 +- .../include/hermes_blob_mdm/hermes_blob_mdm.h | 51 ++++++++----------- .../hermes_blob_mdm/hermes_blob_mdm_tasks.h | 2 +- tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc | 8 +-- .../hermes_bucket_mdm/hermes_bucket_mdm.h | 8 +-- .../src/hermes_bucket_mdm.cc | 6 +-- .../include/hermes_data_op/hermes_data_op.h | 8 +-- tasks/hermes_data_op/src/hermes_data_op.cc | 6 +-- .../include/hermes_mdm/hermes_mdm.h | 6 +-- 22 files changed, 79 insertions(+), 127 deletions(-) diff --git a/hrun/include/hrun/api/hrun_client.h b/hrun/include/hrun/api/hrun_client.h index a9c199dbd..c892b9a2f 100644 --- a/hrun/include/hrun/api/hrun_client.h +++ b/hrun/include/hrun/api/hrun_client.h @@ -290,7 +290,10 @@ class Client : public ConfigurationManager { /** Get a queue by its ID */ HSHM_ALWAYS_INLINE MultiQueue* GetQueue(const QueueId &queue_id) { - return queue_manager_.GetQueue(queue_id); + if (queue_id == HRUN_QM_CLIENT->process_queue_) { + return queue_manager_.GetQueue(queue_id); + } + return queue_manager_.GetQueue(HRUN_QM_CLIENT->admin_queue_); } /** Detect if a task is local or remote */ diff --git a/hrun/include/hrun/queue_manager/queue_manager_runtime.h b/hrun/include/hrun/queue_manager/queue_manager_runtime.h index f950649c3..2c8e7de0f 100644 --- a/hrun/include/hrun/queue_manager/queue_manager_runtime.h +++ b/hrun/include/hrun/queue_manager/queue_manager_runtime.h @@ -55,15 +55,17 @@ class QueueManagerRuntime : public QueueManager { queue_map_->resize(max_queues_); // Create the admin queue MultiQueue *queue; - queue = CreateQueue(admin_queue_, { - {TaskPrio::kAdmin, 1, 1, qm.queue_depth_, QUEUE_UNORDERED} - }); - queue->flags_.SetBits(QUEUE_READY); - queue = CreateQueue(process_queue_, { + std::vector queue_info{ {TaskPrio::kAdmin, 1, 1, qm.queue_depth_, QUEUE_UNORDERED}, {TaskPrio::kLongRunning, 1, 1, qm.queue_depth_, QUEUE_LONG_RUNNING}, - {TaskPrio::kLowLatency, qm.max_lanes_, qm.max_lanes_, qm.queue_depth_, QUEUE_LOW_LATENCY} - }); + {TaskPrio::kLowLatency, qm.max_lanes_, qm.max_lanes_, qm.queue_depth_, QUEUE_LOW_LATENCY}, + {TaskPrio::kLongRunningTether, qm.max_lanes_, qm.max_lanes_, qm.queue_depth_, + QUEUE_LONG_RUNNING | QUEUE_TETHERED, TaskPrio::kLowLatency}, + {TaskPrio::kHighLatency, qm.max_lanes_, qm.max_lanes_, qm.queue_depth_, QUEUE_LOW_LATENCY} + }; + queue = CreateQueue(admin_queue_, queue_info); + queue->flags_.SetBits(QUEUE_READY); + queue = CreateQueue(process_queue_, queue_info); queue->flags_.SetBits(QUEUE_READY); } diff --git a/hrun/include/hrun/task_registry/task.h b/hrun/include/hrun/task_registry/task.h index 5a92f2e27..47813b083 100644 --- a/hrun/include/hrun/task_registry/task.h +++ b/hrun/include/hrun/task_registry/task.h @@ -227,13 +227,13 @@ struct TaskFlags : public IsTask { /** Prioritization of tasks */ class TaskPrio { public: - TASK_PRIO_T kAdmin = 0; - TASK_PRIO_T kLongRunning = 1; - TASK_PRIO_T kLowLatency = 2; - TASK_PRIO_T kHighLatency = 3; + TASK_PRIO_T kAdmin = 0; /**< Admin task lane */ + TASK_PRIO_T kLongRunning = 1; /**< Long-running task lane */ + TASK_PRIO_T kLowLatency = 2; /**< Low latency task lane */ + TASK_PRIO_T kLongRunningTether = 3; /**< Tethered to low latency workers */ + TASK_PRIO_T kHighLatency = 4; /**< High latency task lane */ }; - /** Used to indicate the amount of work remaining to do when flushing */ struct WorkPending { bool flushing_; diff --git a/hrun/include/hrun/task_registry/task_lib.h b/hrun/include/hrun/task_registry/task_lib.h index 4f0cab2ff..fe9d85a1b 100644 --- a/hrun/include/hrun/task_registry/task_lib.h +++ b/hrun/include/hrun/task_registry/task_lib.h @@ -102,9 +102,11 @@ class TaskLibClient { public: /** Init from existing ID */ - void Init(const TaskStateId &id) { + void Init(const TaskStateId &id, + const QueueId &queue_id) { id_ = id; - queue_id_ = QueueId(id_); + // queue_id_ = QueueId(id_); + queue_id_ = queue_id; } }; diff --git a/hrun/tasks_required/TASK_NAME/include/TASK_NAME/TASK_NAME.h b/hrun/tasks_required/TASK_NAME/include/TASK_NAME/TASK_NAME.h index 9edd34e59..0db31c06e 100644 --- a/hrun/tasks_required/TASK_NAME/include/TASK_NAME/TASK_NAME.h +++ b/hrun/tasks_required/TASK_NAME/include/TASK_NAME/TASK_NAME.h @@ -34,11 +34,7 @@ class Client : public TaskLibClient { const std::string &state_name) { id_ = TaskStateId::GetNull(); QueueManagerInfo &qm = HRUN_CLIENT->server_config_.queue_manager_; - std::vector queue_info = { - {TaskPrio::kAdmin, 1, 1, qm.queue_depth_, 0}, - {TaskPrio::kLongRunning, 1, 1, qm.queue_depth_, QUEUE_LONG_RUNNING}, - {TaskPrio::kLowLatency, qm.max_lanes_, qm.max_lanes_, qm.queue_depth_, QUEUE_LOW_LATENCY} - }; + std::vector queue_info; return HRUN_ADMIN->AsyncCreateTaskState( task_node, domain_id, state_name, id_, queue_info); } @@ -49,8 +45,7 @@ class Client : public TaskLibClient { LPointer task = AsyncCreateRoot(std::forward(args)...); task->Wait(); - id_ = task->id_; - queue_id_ = QueueId(id_); + Init(id_, HRUN_ADMIN->queue_id_); HRUN_CLIENT->DelTask(task); } diff --git a/hrun/tasks_required/hrun_admin/src/hrun_admin.cc b/hrun/tasks_required/hrun_admin/src/hrun_admin.cc index b0547b109..dc10f74ce 100644 --- a/hrun/tasks_required/hrun_admin/src/hrun_admin.cc +++ b/hrun/tasks_required/hrun_admin/src/hrun_admin.cc @@ -88,9 +88,9 @@ class Server : public TaskLib { return; } // Create the task queue for the state - QueueId qid(task->id_); - MultiQueue *queue = HRUN_QM_RUNTIME->CreateQueue( - qid, task->queue_info_->vec()); + // QueueId qid(task->id_); + // MultiQueue *queue = HRUN_QM_RUNTIME->CreateQueue( + // qid, task->queue_info_->vec()); // Allocate the task state task->method_ = Method::kConstruct; HRUN_TASK_REGISTRY->CreateTaskState( @@ -98,7 +98,7 @@ class Server : public TaskLib { state_name.c_str(), task->id_, task); - queue->flags_.SetBits(QUEUE_READY); + // queue->flags_.SetBits(QUEUE_READY); task->method_ = Method::kCreateTaskState; task->SetModuleComplete(); } diff --git a/hrun/tasks_required/proc_queue/include/proc_queue/proc_queue.h b/hrun/tasks_required/proc_queue/include/proc_queue/proc_queue.h index abeca1d7f..c7e049ba7 100644 --- a/hrun/tasks_required/proc_queue/include/proc_queue/proc_queue.h +++ b/hrun/tasks_required/proc_queue/include/proc_queue/proc_queue.h @@ -36,12 +36,7 @@ class Client : public TaskLibClient { const std::string &state_name) { id_ = TaskStateId::GetNull(); QueueManagerInfo &qm = HRUN_CLIENT->server_config_.queue_manager_; - std::vector queue_info = { - {TaskPrio::kAdmin, 1, 1, qm.queue_depth_, 0}, - {TaskPrio::kLongRunning, 1, 1, qm.queue_depth_, QUEUE_LONG_RUNNING}, - // TODO(llogan): Specify different depth for proc queue - {TaskPrio::kLowLatency, qm.max_lanes_, qm.max_lanes_, 16, QUEUE_LOW_LATENCY} - }; + std::vector queue_info; return HRUN_ADMIN->AsyncCreateTaskState( task_node, domain_id, state_name, id_, queue_info); } @@ -52,8 +47,7 @@ class Client : public TaskLibClient { LPointer task = AsyncCreateRoot(std::forward(args)...); task->Wait(); - id_ = task->id_; - queue_id_ = QueueId(id_); + Init(id_, HRUN_ADMIN->queue_id_); HRUN_CLIENT->DelTask(task); } diff --git a/hrun/tasks_required/remote_queue/include/remote_queue/remote_queue.h b/hrun/tasks_required/remote_queue/include/remote_queue/remote_queue.h index d048bb4bb..e2c135b9a 100644 --- a/hrun/tasks_required/remote_queue/include/remote_queue/remote_queue.h +++ b/hrun/tasks_required/remote_queue/include/remote_queue/remote_queue.h @@ -32,13 +32,7 @@ class Client : public TaskLibClient { const TaskStateId &state_id) { id_ = state_id; QueueManagerInfo &qm = HRUN_CLIENT->server_config_.queue_manager_; - // NOTE(llogan): 32x queue depth b/c default num rpc threads is 32 - std::vector queue_info = { - {TaskPrio::kAdmin, 1, 1, qm.queue_depth_, 0}, - {TaskPrio::kLongRunning, 1, 1, qm.queue_depth_, QUEUE_LONG_RUNNING}, - // {qm.max_lanes_, qm.max_lanes_, qm.queue_depth_, QUEUE_LOW_LATENCY} - {TaskPrio::kLowLatency, 1, 1, qm.queue_depth_, QUEUE_LOW_LATENCY}, - }; + std::vector queue_info; return HRUN_ADMIN->AsyncCreateTaskState( task_node, domain_id, state_name, id_, queue_info); } @@ -49,8 +43,7 @@ class Client : public TaskLibClient { LPointer task = AsyncCreateRoot(std::forward(args)...); task->Wait(); - id_ = task->id_; - queue_id_ = QueueId(id_); + Init(id_, HRUN_ADMIN->queue_id_); HRUN_CLIENT->DelTask(task); } diff --git a/hrun/tasks_required/small_message/include/small_message/small_message.h b/hrun/tasks_required/small_message/include/small_message/small_message.h index 05d10e5ac..bd40553b1 100644 --- a/hrun/tasks_required/small_message/include/small_message/small_message.h +++ b/hrun/tasks_required/small_message/include/small_message/small_message.h @@ -25,14 +25,10 @@ class Client : public TaskLibClient { const std::string &state_name) { id_ = TaskStateId::GetNull(); QueueManagerInfo &qm = HRUN_CLIENT->server_config_.queue_manager_; - std::vector queue_info = { - {TaskPrio::kAdmin, 1, 1, qm.queue_depth_, 0}, - {TaskPrio::kLongRunning, 1, 1, qm.queue_depth_, QUEUE_LONG_RUNNING}, - {TaskPrio::kLowLatency, qm.max_lanes_, qm.max_lanes_, qm.queue_depth_, QUEUE_LOW_LATENCY} - }; + std::vector queue_info; id_ = HRUN_ADMIN->CreateTaskStateRoot( domain_id, state_name, id_, queue_info); - queue_id_ = QueueId(id_); + Init(id_, HRUN_ADMIN->queue_id_); } /** Destroy state + queue */ diff --git a/hrun/tasks_required/worch_proc_round_robin/include/worch_proc_round_robin/worch_proc_round_robin.h b/hrun/tasks_required/worch_proc_round_robin/include/worch_proc_round_robin/worch_proc_round_robin.h index 4673369cb..7dc764877 100644 --- a/hrun/tasks_required/worch_proc_round_robin/include/worch_proc_round_robin/worch_proc_round_robin.h +++ b/hrun/tasks_required/worch_proc_round_robin/include/worch_proc_round_robin/worch_proc_round_robin.h @@ -24,9 +24,7 @@ class Client : public TaskLibClient { void CreateRoot(const DomainId &domain_id, const std::string &state_name) { id_ = TaskStateId::GetNull(); - std::vector queue_info = { - {TaskPrio::kAdmin, 1, 1, 4, 0}, - }; + std::vector queue_info; id_ = HRUN_ADMIN->CreateTaskStateRoot( domain_id, state_name, id_, queue_info); } diff --git a/hrun/tasks_required/worch_queue_round_robin/include/worch_queue_round_robin/worch_queue_round_robin.h b/hrun/tasks_required/worch_queue_round_robin/include/worch_queue_round_robin/worch_queue_round_robin.h index d4a161f87..cdffb5318 100644 --- a/hrun/tasks_required/worch_queue_round_robin/include/worch_queue_round_robin/worch_queue_round_robin.h +++ b/hrun/tasks_required/worch_queue_round_robin/include/worch_queue_round_robin/worch_queue_round_robin.h @@ -24,11 +24,10 @@ class Client : public TaskLibClient { void CreateRoot(const DomainId &domain_id, const std::string &state_name) { id_ = TaskStateId::GetNull(); - std::vector queue_info = { - {TaskPrio::kAdmin, 1, 1, 4, 0}, - }; + std::vector queue_info; id_ = HRUN_ADMIN->CreateTaskStateRoot( domain_id, state_name, id_, queue_info); + Init(id_, HRUN_ADMIN->queue_id_); } /** Destroy task state */ diff --git a/tasks/bdev/include/bdev/bdev.h b/tasks/bdev/include/bdev/bdev.h index 8b26d056b..2d6f826cf 100644 --- a/tasks/bdev/include/bdev/bdev.h +++ b/tasks/bdev/include/bdev/bdev.h @@ -49,12 +49,7 @@ class Client : public TaskLibClient { id_ = TaskStateId::GetNull(); CopyDevInfo(dev_info); QueueManagerInfo &qm = HRUN_CLIENT->server_config_.queue_manager_; - std::vector queue_info = { - {TaskPrio::kAdmin, 1, 1, qm.queue_depth_, 0}, - {TaskPrio::kLongRunning, 1, 1, qm.queue_depth_, QUEUE_LONG_RUNNING}, - {TaskPrio::kLowLatency, qm.max_lanes_, qm.max_lanes_, qm.queue_depth_, QUEUE_LOW_LATENCY}, - {TaskPrio::kHighLatency, qm.max_lanes_, qm.max_lanes_, qm.queue_depth_, 0} - }; + std::vector queue_info; return HRUN_ADMIN->AsyncCreateTaskState( task_node, domain_id, state_name, lib_name, id_, queue_info, dev_info); @@ -62,7 +57,7 @@ class Client : public TaskLibClient { void AsyncCreateComplete(ConstructTask *task) { if (task->IsModuleComplete()) { id_ = task->id_; - queue_id_ = QueueId(id_); + Init(id_, HRUN_ADMIN->queue_id_); monitor_task_ = AsyncStatBdev(task->task_node_ + 1, 100).ptr_; HRUN_CLIENT->DelTask(task); } diff --git a/tasks/data_stager/include/data_stager/data_stager.h b/tasks/data_stager/include/data_stager/data_stager.h index a0cd397dc..ec524856c 100644 --- a/tasks/data_stager/include/data_stager/data_stager.h +++ b/tasks/data_stager/include/data_stager/data_stager.h @@ -26,11 +26,7 @@ class Client : public TaskLibClient { const TaskStateId &blob_mdm) { id_ = TaskStateId::GetNull(); QueueManagerInfo &qm = HRUN_CLIENT->server_config_.queue_manager_; - std::vector queue_info = { - {TaskPrio::kAdmin, 1, 1, qm.queue_depth_, 0}, - {TaskPrio::kLongRunning, 1, 1, qm.queue_depth_, QUEUE_LONG_RUNNING}, - {TaskPrio::kLowLatency, qm.max_lanes_, qm.max_lanes_, qm.queue_depth_, QUEUE_LOW_LATENCY} - }; + std::vector queue_info; return HRUN_ADMIN->AsyncCreateTaskState( task_node, domain_id, state_name, id_, queue_info, blob_mdm); } @@ -42,7 +38,7 @@ class Client : public TaskLibClient { AsyncCreateRoot(std::forward(args)...); task->Wait(); id_ = task->id_; - queue_id_ = QueueId(id_); + Init(id_, HRUN_ADMIN->queue_id_); HRUN_CLIENT->DelTask(task); } diff --git a/tasks/data_stager/src/data_stager.cc b/tasks/data_stager/src/data_stager.cc index 710658748..08275d05a 100644 --- a/tasks/data_stager/src/data_stager.cc +++ b/tasks/data_stager/src/data_stager.cc @@ -24,7 +24,7 @@ class Server : public TaskLib { void Construct(ConstructTask *task, RunContext &rctx) { task->Deserialize(); url_map_.resize(HRUN_QM_RUNTIME->max_lanes_); - blob_mdm_.Init(task->blob_mdm_); + blob_mdm_.Init(task->blob_mdm_, HRUN_ADMIN->queue_id_); HILOG(kInfo, "(node {}) BLOB MDM: {}", HRUN_CLIENT->node_id_, blob_mdm_.id_); task->SetModuleComplete(); } diff --git a/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm.h b/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm.h index 0e10d9b24..6f911e863 100644 --- a/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm.h +++ b/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm.h @@ -19,25 +19,14 @@ class Client : public TaskLibClient { /** Destructor */ ~Client() = default; - /** Initialize directly using TaskStateId */ - void Init(const TaskStateId &id) { - id_ = id; - queue_id_ = QueueId(id_); - } - /** Create a hermes_blob_mdm */ HSHM_ALWAYS_INLINE - LPointer AsyncCreate(const TaskNode &task_node, - const DomainId &domain_id, - const std::string &state_name) { + LPointer AsyncCreate(const TaskNode &task_node, + const DomainId &domain_id, + const std::string &state_name) { id_ = TaskStateId::GetNull(); QueueManagerInfo &qm = HRUN_CLIENT->server_config_.queue_manager_; - std::vector queue_info = { - {TaskPrio::kAdmin, 1, 1, qm.queue_depth_, 0}, - {TaskPrio::kLongRunning, qm.max_lanes_, qm.max_lanes_, qm.queue_depth_, - QUEUE_LONG_RUNNING | QUEUE_TETHERED, TaskPrio::kLowLatency}, - {TaskPrio::kLowLatency, qm.max_lanes_, qm.max_lanes_, qm.queue_depth_, QUEUE_LOW_LATENCY} - }; + std::vector queue_info; return HRUN_ADMIN->AsyncCreateTaskState( task_node, domain_id, state_name, id_, queue_info); } @@ -82,7 +71,7 @@ class Client : public TaskLibClient { const TaskStateId &stager_mdm, const TaskStateId &op_mdm) { LPointer> push_task = - AsyncSetBucketMdmRoot(domain_id, blob_mdm, stager_mdm, op_mdm); + AsyncSetBucketMdmRoot(domain_id, blob_mdm, stager_mdm, op_mdm); push_task->Wait(); HRUN_CLIENT->DelTask(push_task); } @@ -102,7 +91,7 @@ class Client : public TaskLibClient { } BlobId GetOrCreateBlobIdRoot(TagId tag_id, const hshm::charbuf &blob_name) { LPointer> push_task = - AsyncGetOrCreateBlobIdRoot(tag_id, blob_name); + AsyncGetOrCreateBlobIdRoot(tag_id, blob_name); push_task->Wait(); GetOrCreateBlobIdTask *task = push_task->get(); BlobId blob_id = task->blob_id_; @@ -165,7 +154,7 @@ class Client : public TaskLibClient { Context ctx = Context(), u32 flags = 0) { LPointer> push_task = - AsyncGetBlobRoot(tag_id, hshm::charbuf(""), blob_id, off, data_size, data, ctx, flags); + AsyncGetBlobRoot(tag_id, hshm::charbuf(""), blob_id, off, data_size, data, ctx, flags); push_task->Wait(); GetBlobTask *task = push_task->get(); data = task->data_; @@ -216,7 +205,7 @@ class Client : public TaskLibClient { const BlobId &blob_id, const TagId &tag) { LPointer> push_task = - AsyncTagBlobRoot(tag_id, blob_id, tag); + AsyncTagBlobRoot(tag_id, blob_id, tag); push_task->Wait(); HRUN_CLIENT->DelTask(push_task); } @@ -238,7 +227,7 @@ class Client : public TaskLibClient { const BlobId &blob_id, const TagId &tag) { LPointer> push_task = - AsyncBlobHasTagRoot(tag_id, blob_id, tag); + AsyncBlobHasTagRoot(tag_id, blob_id, tag); push_task->Wait(); BlobHasTagTask *task = push_task->get(); bool has_tag = task->has_tag_; @@ -262,7 +251,7 @@ class Client : public TaskLibClient { BlobId GetBlobIdRoot(const TagId &tag_id, const hshm::charbuf &blob_name) { LPointer> push_task = - AsyncGetBlobIdRoot(tag_id, blob_name); + AsyncGetBlobIdRoot(tag_id, blob_name); push_task->Wait(); GetBlobIdTask *task = push_task->get(); BlobId blob_id = task->blob_id_; @@ -285,7 +274,7 @@ class Client : public TaskLibClient { std::string GetBlobNameRoot(const TagId &tag_id, const BlobId &blob_id) { LPointer> push_task = - AsyncGetBlobNameRoot(tag_id, blob_id); + AsyncGetBlobNameRoot(tag_id, blob_id); push_task->Wait(); GetBlobNameTask *task = push_task->get(); std::string blob_name = task->blob_name_->str(); @@ -311,7 +300,7 @@ class Client : public TaskLibClient { const hshm::charbuf &blob_name, const BlobId &blob_id) { LPointer> push_task = - AsyncGetBlobSizeRoot(tag_id, blob_name, blob_id); + AsyncGetBlobSizeRoot(tag_id, blob_name, blob_id); push_task->Wait(); GetBlobSizeTask *task = push_task->get(); size_t size = task->size_; @@ -334,7 +323,7 @@ class Client : public TaskLibClient { float GetBlobScoreRoot(const TagId &tag_id, const BlobId &blob_id) { LPointer> push_task = - AsyncGetBlobScoreRoot(tag_id, blob_id); + AsyncGetBlobScoreRoot(tag_id, blob_id); push_task->Wait(); GetBlobScoreTask *task = push_task->get(); float score = task->score_; @@ -383,7 +372,7 @@ class Client : public TaskLibClient { const BlobId &blob_id, const hshm::charbuf &new_blob_name) { LPointer> push_task = - AsyncRenameBlobRoot(tag_id, blob_id, new_blob_name); + AsyncRenameBlobRoot(tag_id, blob_id, new_blob_name); push_task->Wait(); HRUN_CLIENT->DelTask(push_task); } @@ -405,7 +394,7 @@ class Client : public TaskLibClient { const BlobId &blob_id, size_t new_size) { LPointer> push_task = - AsyncTruncateBlobRoot(tag_id, blob_id, new_size); + AsyncTruncateBlobRoot(tag_id, blob_id, new_size); push_task->Wait(); HRUN_CLIENT->DelTask(push_task); } @@ -427,7 +416,7 @@ class Client : public TaskLibClient { const BlobId &blob_id, bool update_size = true) { LPointer> push_task = - AsyncDestroyBlobRoot(tag_id, blob_id, update_size); + AsyncDestroyBlobRoot(tag_id, blob_id, update_size); push_task->Wait(); HRUN_CLIENT->DelTask(push_task); } @@ -445,13 +434,13 @@ class Client : public TaskLibClient { * Get all blob metadata * */ void AsyncPollBlobMetadataConstruct(PollBlobMetadataTask *task, - const TaskNode &task_node) { + const TaskNode &task_node) { HRUN_CLIENT->ConstructTask( task, task_node, id_); } std::vector PollBlobMetadataRoot() { LPointer> push_task = - AsyncPollBlobMetadataRoot(); + AsyncPollBlobMetadataRoot(); push_task->Wait(); PollBlobMetadataTask *task = push_task->get(); std::vector blob_mdms = @@ -471,7 +460,7 @@ class Client : public TaskLibClient { } std::vector PollTargetMetadataRoot() { LPointer> push_task = - AsyncPollTargetMetadataRoot(); + AsyncPollTargetMetadataRoot(); push_task->Wait(); PollTargetMetadataTask *task = push_task->get(); std::vector target_mdms = @@ -484,4 +473,4 @@ class Client : public TaskLibClient { } // namespace hrun -#endif // HRUN_hermes_blob_mdm_H_ +#endif // HRUN_hermes_blob_mdm_H_ \ No newline at end of file diff --git a/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h b/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h index e7c662ecb..77eff2907 100644 --- a/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h +++ b/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h @@ -1162,7 +1162,7 @@ struct FlushDataTask : public Task, TaskFlags { // Initialize task task_node_ = task_node; lane_hash_ = 0; - prio_ = TaskPrio::kLongRunning; + prio_ = TaskPrio::kLongRunningTether; task_state_ = state_id; method_ = Method::kFlushData; task_flags_.SetBits( diff --git a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc index 56217f89b..a13d3f36b 100644 --- a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc +++ b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc @@ -106,7 +106,7 @@ class Server : public TaskLib { client.id_, client.bandwidth_, client.bw_score_); } fallback_target_ = &targets_.back(); - blob_mdm_.Init(id_); + blob_mdm_.Init(id_, HRUN_ADMIN->queue_id_); HILOG(kInfo, "(node {}) Created Blob MDM", HRUN_CLIENT->node_id_); task->SetModuleComplete(); } @@ -136,9 +136,9 @@ class Server : public TaskLib { * */ void SetBucketMdm(SetBucketMdmTask *task, RunContext &rctx) { if (bkt_mdm_.id_.IsNull()) { - bkt_mdm_.Init(task->bkt_mdm_); - stager_mdm_.Init(task->stager_mdm_); - op_mdm_.Init(task->op_mdm_); + bkt_mdm_.Init(task->bkt_mdm_, HRUN_ADMIN->queue_id_); + stager_mdm_.Init(task->stager_mdm_, HRUN_ADMIN->queue_id_); + op_mdm_.Init(task->op_mdm_, HRUN_ADMIN->queue_id_); flush_task_ = blob_mdm_.AsyncFlushData(task->task_node_ + 1); } task->SetModuleComplete(); diff --git a/tasks/hermes_bucket_mdm/include/hermes_bucket_mdm/hermes_bucket_mdm.h b/tasks/hermes_bucket_mdm/include/hermes_bucket_mdm/hermes_bucket_mdm.h index fac654df6..f462d2218 100644 --- a/tasks/hermes_bucket_mdm/include/hermes_bucket_mdm/hermes_bucket_mdm.h +++ b/tasks/hermes_bucket_mdm/include/hermes_bucket_mdm/hermes_bucket_mdm.h @@ -24,14 +24,10 @@ class Client : public TaskLibClient { const std::string &state_name) { id_ = TaskStateId::GetNull(); QueueManagerInfo &qm = HRUN_CLIENT->server_config_.queue_manager_; - std::vector queue_info = { - {TaskPrio::kAdmin, 1, 1, qm.queue_depth_, 0}, - {TaskPrio::kLongRunning, 1, 1, qm.queue_depth_, QUEUE_LONG_RUNNING}, - {TaskPrio::kLowLatency, qm.max_lanes_, qm.max_lanes_, qm.queue_depth_, QUEUE_LOW_LATENCY} - }; + std::vector queue_info; id_ = HRUN_ADMIN->CreateTaskStateRoot( domain_id, state_name, id_, queue_info); - queue_id_ = QueueId(id_); + Init(id_, HRUN_ADMIN->queue_id_); } /** Destroy task state + queue */ diff --git a/tasks/hermes_bucket_mdm/src/hermes_bucket_mdm.cc b/tasks/hermes_bucket_mdm/src/hermes_bucket_mdm.cc index e0b79bc4c..9c81cacca 100644 --- a/tasks/hermes_bucket_mdm/src/hermes_bucket_mdm.cc +++ b/tasks/hermes_bucket_mdm/src/hermes_bucket_mdm.cc @@ -33,7 +33,7 @@ class Server : public TaskLib { void Construct(ConstructTask *task, RunContext &rctx) { id_alloc_ = 0; node_id_ = HRUN_CLIENT->node_id_; - bkt_mdm_.Init(id_); + bkt_mdm_.Init(id_, HRUN_ADMIN->queue_id_); tag_id_map_.resize(HRUN_QM_RUNTIME->max_lanes_); tag_map_.resize(HRUN_QM_RUNTIME->max_lanes_); task->SetModuleComplete(); @@ -52,8 +52,8 @@ class Server : public TaskLib { * Set the Blob MDM * */ void SetBlobMdm(SetBlobMdmTask *task, RunContext &rctx) { - blob_mdm_.Init(task->blob_mdm_); - stager_mdm_.Init(task->stager_mdm_); + blob_mdm_.Init(task->blob_mdm_, HRUN_ADMIN->queue_id_); + stager_mdm_.Init(task->stager_mdm_, HRUN_ADMIN->queue_id_); task->SetModuleComplete(); } void MonitorSetBlobMdm(u32 mode, SetBlobMdmTask *task, RunContext &rctx) { diff --git a/tasks/hermes_data_op/include/hermes_data_op/hermes_data_op.h b/tasks/hermes_data_op/include/hermes_data_op/hermes_data_op.h index 2f503f6c4..78bc2e82a 100644 --- a/tasks/hermes_data_op/include/hermes_data_op/hermes_data_op.h +++ b/tasks/hermes_data_op/include/hermes_data_op/hermes_data_op.h @@ -28,11 +28,7 @@ class Client : public TaskLibClient { TaskStateId &blob_mdm_id) { id_ = TaskStateId::GetNull(); QueueManagerInfo &qm = HRUN_CLIENT->server_config_.queue_manager_; - std::vector queue_info = { - {TaskPrio::kAdmin, 1, 1, qm.queue_depth_, 0}, - {TaskPrio::kLongRunning, 1, 1, qm.queue_depth_, QUEUE_LONG_RUNNING}, - {TaskPrio::kLowLatency, qm.max_lanes_, qm.max_lanes_, qm.queue_depth_, QUEUE_LOW_LATENCY} - }; + std::vector queue_info; return HRUN_ADMIN->AsyncCreateTaskState( task_node, domain_id, state_name, id_, queue_info, bkt_mdm_id, blob_mdm_id); @@ -45,7 +41,7 @@ class Client : public TaskLibClient { AsyncCreateRoot(std::forward(args)...); task->Wait(); id_ = task->id_; - queue_id_ = QueueId(id_); + Init(id_, HRUN_ADMIN->queue_id_); HRUN_CLIENT->DelTask(task); } diff --git a/tasks/hermes_data_op/src/hermes_data_op.cc b/tasks/hermes_data_op/src/hermes_data_op.cc index 09a20d6ff..2d4bfcae4 100644 --- a/tasks/hermes_data_op/src/hermes_data_op.cc +++ b/tasks/hermes_data_op/src/hermes_data_op.cc @@ -34,9 +34,9 @@ class Server : public TaskLib { /** Construct data operator table */ void Construct(ConstructTask *task, RunContext &rctx) { task->Deserialize(); - bkt_mdm_.Init(task->bkt_mdm_); - blob_mdm_.Init(task->blob_mdm_); - client_.Init(id_); + bkt_mdm_.Init(task->bkt_mdm_, HRUN_ADMIN->queue_id_); + blob_mdm_.Init(task->blob_mdm_, HRUN_ADMIN->queue_id_); + client_.Init(id_, HRUN_ADMIN->queue_id_); op_id_map_["min"] = 0; op_id_map_["max"] = 1; op_graphs_.resize(HRUN_QM_RUNTIME->max_lanes_); diff --git a/tasks/hermes_mdm/include/hermes_mdm/hermes_mdm.h b/tasks/hermes_mdm/include/hermes_mdm/hermes_mdm.h index c3deddb71..283670594 100644 --- a/tasks/hermes_mdm/include/hermes_mdm/hermes_mdm.h +++ b/tasks/hermes_mdm/include/hermes_mdm/hermes_mdm.h @@ -23,12 +23,10 @@ class Client : public TaskLibClient { void CreateRoot(const DomainId &domain_id, const std::string &state_name) { id_ = TaskStateId::GetNull(); - std::vector queue_info = { - {TaskPrio::kAdmin, 1, 1, 1, 0}, - }; + std::vector queue_info; id_ = HRUN_ADMIN->CreateTaskStateRoot( domain_id, state_name, id_, queue_info); - queue_id_ = QueueId(id_); + Init(id_, HRUN_ADMIN->queue_id_); } /** Destroy task state + queue */ From f2737ee65d73e3ba03ae66bf5afe6d9c608132a5 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Fri, 22 Dec 2023 08:21:51 -0600 Subject: [PATCH 15/19] Add process queue depth to hrun config --- hrun/config/hrun_server_default.yaml | 2 ++ hrun/include/hrun/config/config_server.h | 2 ++ .../hrun/config/config_server_default.h | 2 ++ .../queue_manager/queue_manager_runtime.h | 19 +++++++++++++++++-- hrun/src/config_server.cc | 10 ++++++++-- .../src/worch_queue_round_robin.cc | 6 +++--- 6 files changed, 34 insertions(+), 7 deletions(-) diff --git a/hrun/config/hrun_server_default.yaml b/hrun/config/hrun_server_default.yaml index c4578eb36..26a95007b 100644 --- a/hrun/config/hrun_server_default.yaml +++ b/hrun/config/hrun_server_default.yaml @@ -9,6 +9,8 @@ work_orchestrator: ### Queue Manager settings queue_manager: + # The default depth of process queue + proc_queue_depth: 8192 # The default depth of allocated queues queue_depth: 100000 # The maximum number of lanes per queue diff --git a/hrun/include/hrun/config/config_server.h b/hrun/include/hrun/config/config_server.h index bdda384a1..700daa6d7 100644 --- a/hrun/include/hrun/config/config_server.h +++ b/hrun/include/hrun/config/config_server.h @@ -36,6 +36,8 @@ struct WorkOrchestratorInfo { struct QueueManagerInfo { /** Maximum depth of IPC queues */ u32 queue_depth_; + /** Maximum depth of process queue */ + u32 proc_queue_depth_; /** Maximum number of lanes per IPC queue */ u32 max_lanes_; /** Maximum number of allocatable IPC queues */ diff --git a/hrun/include/hrun/config/config_server_default.h b/hrun/include/hrun/config/config_server_default.h index 5e3b8287e..c511a15d1 100644 --- a/hrun/include/hrun/config/config_server_default.h +++ b/hrun/include/hrun/config/config_server_default.h @@ -12,6 +12,8 @@ const inline char* kHrunServerDefaultConfigStr = "\n" "### Queue Manager settings\n" "queue_manager:\n" +" # The default depth of process queue\n" +" proc_queue_depth: 8192\n" " # The default depth of allocated queues\n" " queue_depth: 100000\n" " # The maximum number of lanes per queue\n" diff --git a/hrun/include/hrun/queue_manager/queue_manager_runtime.h b/hrun/include/hrun/queue_manager/queue_manager_runtime.h index 2c8e7de0f..ffe08ce28 100644 --- a/hrun/include/hrun/queue_manager/queue_manager_runtime.h +++ b/hrun/include/hrun/queue_manager/queue_manager_runtime.h @@ -63,9 +63,24 @@ class QueueManagerRuntime : public QueueManager { QUEUE_LONG_RUNNING | QUEUE_TETHERED, TaskPrio::kLowLatency}, {TaskPrio::kHighLatency, qm.max_lanes_, qm.max_lanes_, qm.queue_depth_, QUEUE_LOW_LATENCY} }; - queue = CreateQueue(admin_queue_, queue_info); + queue = CreateQueue(admin_queue_, { + {TaskPrio::kAdmin, 1, 1, qm.queue_depth_, QUEUE_UNORDERED}, + {TaskPrio::kLongRunning, 1, 1, qm.queue_depth_, QUEUE_LONG_RUNNING}, + {TaskPrio::kLowLatency, qm.max_lanes_, qm.max_lanes_, qm.queue_depth_, QUEUE_LOW_LATENCY}, + {TaskPrio::kLongRunningTether, qm.max_lanes_, qm.max_lanes_, qm.queue_depth_, + QUEUE_LONG_RUNNING | QUEUE_TETHERED, TaskPrio::kLowLatency}, + {TaskPrio::kHighLatency, qm.max_lanes_, qm.max_lanes_, qm.queue_depth_, QUEUE_LOW_LATENCY} + }); queue->flags_.SetBits(QUEUE_READY); - queue = CreateQueue(process_queue_, queue_info); + u32 depth = qm.proc_queue_depth_; + queue = CreateQueue(process_queue_, { + {TaskPrio::kAdmin, 1, 1, depth, QUEUE_UNORDERED}, + {TaskPrio::kLongRunning, 1, 1, depth, QUEUE_LONG_RUNNING}, + {TaskPrio::kLowLatency, qm.max_lanes_, qm.max_lanes_, depth, QUEUE_LOW_LATENCY}, + {TaskPrio::kLongRunningTether, qm.max_lanes_, qm.max_lanes_, depth, + QUEUE_LONG_RUNNING | QUEUE_TETHERED, TaskPrio::kLowLatency}, + {TaskPrio::kHighLatency, qm.max_lanes_, qm.max_lanes_, depth, QUEUE_LOW_LATENCY} + }); queue->flags_.SetBits(QUEUE_READY); } diff --git a/hrun/src/config_server.cc b/hrun/src/config_server.cc index 1e21ad9e6..1c50db717 100644 --- a/hrun/src/config_server.cc +++ b/hrun/src/config_server.cc @@ -39,6 +39,10 @@ void ServerConfig::ParseQueueManager(YAML::Node yaml_conf) { if (yaml_conf["queue_depth"]) { queue_manager_.queue_depth_ = yaml_conf["queue_depth"].as(); } + if (yaml_conf["proc_queue_depth"]) { + queue_manager_.proc_queue_depth_ = + yaml_conf["proc_queue_depth"].as(); + } if (yaml_conf["max_lanes"]) { queue_manager_.max_lanes_ = yaml_conf["max_lanes"].as(); } @@ -49,8 +53,10 @@ void ServerConfig::ParseQueueManager(YAML::Node yaml_conf) { queue_manager_.shm_allocator_ = yaml_conf["shm_allocator"].as(); } if (yaml_conf["shm_name"]) { - queue_manager_.shm_name_ = hshm::ConfigParse::ExpandPath(yaml_conf["shm_name"].as()); - queue_manager_.data_shm_name_ = hshm::ConfigParse::ExpandPath(queue_manager_.shm_name_ + "_data"); + queue_manager_.shm_name_ = + hshm::ConfigParse::ExpandPath(yaml_conf["shm_name"].as()); + queue_manager_.data_shm_name_ = + hshm::ConfigParse::ExpandPath(queue_manager_.shm_name_ + "_data"); } if (yaml_conf["shm_size"]) { queue_manager_.shm_size_ = hshm::ConfigParse::ParseSize( diff --git a/hrun/tasks_required/worch_queue_round_robin/src/worch_queue_round_robin.cc b/hrun/tasks_required/worch_queue_round_robin/src/worch_queue_round_robin.cc index d058b8673..2dbd435e8 100644 --- a/hrun/tasks_required/worch_queue_round_robin/src/worch_queue_round_robin.cc +++ b/hrun/tasks_required/worch_queue_round_robin/src/worch_queue_round_robin.cc @@ -59,7 +59,7 @@ class Server : public TaskLib { Worker &worker = *HRUN_WORK_ORCHESTRATOR->workers_[tether_lane.worker_id_]; worker.PollQueues({WorkEntry(lane_group.prio_, lane_id, &queue)}); lane.worker_id_ = worker.id_; - HILOG(kDebug, "(node {}) Scheduling the queue {} (prio {}, lane {}, worker {})", + HILOG(kInfo, "(node {}) Scheduling the queue {} (prio {}, lane {}, worker {})", HRUN_CLIENT->node_id_, queue.id_, lane_group.prio_, lane_id, worker.id_); } else if (lane_group.IsLowLatency()) { u32 worker_off = count_lowlat_ % HRUN_WORK_ORCHESTRATOR->dworkers_.size(); @@ -67,14 +67,14 @@ class Server : public TaskLib { Worker &worker = *HRUN_WORK_ORCHESTRATOR->dworkers_[worker_off]; worker.PollQueues({WorkEntry(lane_group.prio_, lane_id, &queue)}); lane.worker_id_ = worker.id_; - HILOG(kDebug, "(node {}) Scheduling the queue {} (prio {}, lane {}, worker {})", + HILOG(kInfo, "(node {}) Scheduling the queue {} (prio {}, lane {}, worker {})", HRUN_CLIENT->node_id_, queue.id_, lane_group.prio_, lane_id, worker.id_); } else { u32 worker_off = count_highlat_ % HRUN_WORK_ORCHESTRATOR->oworkers_.size(); count_highlat_ += 1; Worker &worker = *HRUN_WORK_ORCHESTRATOR->oworkers_[worker_off]; worker.PollQueues({WorkEntry(lane_group.prio_, lane_id, &queue)}); - HILOG(kDebug, "(node {}) Scheduling the queue {} (prio {}, lane {}, worker {})", + HILOG(kInfo, "(node {}) Scheduling the queue {} (prio {}, lane {}, worker {})", HRUN_CLIENT->node_id_, queue.id_, lane_group.prio_, lane_id, worker_off); lane.worker_id_ = worker.id_; } From d0364e9f69e66856032a6e0c39298af9d8402092 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Fri, 22 Dec 2023 08:52:56 -0600 Subject: [PATCH 16/19] Add back affiner. Add stack cache. --- hrun/include/hrun/task_registry/task.h | 5 +- .../hrun/work_orchestrator/scheduler.h | 2 +- hrun/include/hrun/work_orchestrator/worker.h | 56 +++++++++++++------ hrun/src/work_orchestrator.cc | 2 +- .../src/worch_proc_round_robin.cc | 2 +- 5 files changed, 45 insertions(+), 22 deletions(-) diff --git a/hrun/include/hrun/task_registry/task.h b/hrun/include/hrun/task_registry/task.h index 47813b083..eac0704a1 100644 --- a/hrun/include/hrun/task_registry/task.h +++ b/hrun/include/hrun/task_registry/task.h @@ -250,10 +250,9 @@ struct WorkPending { /** Context passed to the Run method of a task */ struct RunContext { - u32 lane_id_; /**< The lane id of the task */ + u32 lane_id_; /**< The lane id of the task */ bctx::transfer_t jmp_; /**< Current execution state of the task (runtime) */ - size_t stack_size_ = KILOBYTES(64); /**< The size of the stack for the task (runtime) */ - void *stack_ptr_; /**< The pointer to the stack (runtime) */ + void *stack_ptr_; /**< The pointer to the stack (runtime) */ TaskLib *exec_; WorkPending *flush_; diff --git a/hrun/include/hrun/work_orchestrator/scheduler.h b/hrun/include/hrun/work_orchestrator/scheduler.h index 56b69dd76..29aa7b625 100644 --- a/hrun/include/hrun/work_orchestrator/scheduler.h +++ b/hrun/include/hrun/work_orchestrator/scheduler.h @@ -43,7 +43,7 @@ struct ScheduleTask : public Task, TaskFlags { task_state_ = state_id; method_ = SchedulerMethod::kSchedule; task_flags_.SetBits(TASK_LONG_RUNNING | TASK_REMOTE_DEBUG_MARK); - SetPeriodMs(5); + SetPeriodMs(250); domain_id_ = domain_id; // Custom params diff --git a/hrun/include/hrun/work_orchestrator/worker.h b/hrun/include/hrun/work_orchestrator/worker.h index 18e7555c1..148aa5786 100644 --- a/hrun/include/hrun/work_orchestrator/worker.h +++ b/hrun/include/hrun/work_orchestrator/worker.h @@ -151,18 +151,21 @@ class Worker { u32 numa_node_; // TODO(llogan): track NUMA affinity ABT_xstream xstream_; std::vector work_queue_; /**< The set of queues to poll */ - /** A set of queues to begin polling in a worker */ + /**< A set of queues to begin polling in a worker */ hshm::spsc_queue> poll_queues_; - /** A set of queues to stop polling in a worker */ + /**< A set of queues to stop polling in a worker */ hshm::spsc_queue> relinquish_queues_; - size_t sleep_us_; /** Time the worker should sleep after a run */ - u32 retries_; /** The number of times to repeat the internal run loop before sleeping */ - bitfield32_t flags_; /** Worker metadata flags */ + size_t sleep_us_; /**< Time the worker should sleep after a run */ + u32 retries_; /**< The number of times to repeat the internal run loop before sleeping */ + bitfield32_t flags_; /**< Worker metadata flags */ std::unordered_map - group_map_; /** Determine if a task can be executed right now */ - hshm::charbuf group_; /** The current group */ - WorkPending flush_; /** Info needed for flushing ops */ - hshm::Timepoint now_; /** The current timepoint */ + group_map_; /**< Determine if a task can be executed right now */ + hshm::charbuf group_; /**< The current group */ + WorkPending flush_; /**< Info needed for flushing ops */ + hshm::Timepoint now_; /**< The current timepoint */ + hshm::spsc_queue stacks_; /**< Cache of stacks for tasks */ + int num_stacks_ = 256; /**< Number of stacks */ + int stack_size_ = KILOBYTES(64); public: /**=============================================================== @@ -184,6 +187,10 @@ class Worker { group_.resize(512); group_.resize(0); xstream_ = xstream; + stacks_.Resize(num_stacks_); + for (int i = 0; i < 16; ++i) { + stacks_.emplace(malloc(stack_size_)); + } /* int ret = ABT_thread_create_on_xstream(xstream, [](void *args) { ((Worker*)args)->Loop(); }, this, ABT_THREAD_ATTR_NULL, &tl_thread_); @@ -347,6 +354,23 @@ class Worker { } + /** Allocate a stack for a task */ + void* AllocateStack() { + void *stack; + if (!stacks_.pop(stack).IsNull()) { + return stack; + } + return malloc(stack_size_); + } + + /** Free a stack */ + void FreeStack(void *stack) { + if(!stacks_.emplace(stack).IsNull()) { + return; + } + stacks_.Resize(stacks_.size() + num_stacks_); + } + /** Run an iteration over a particular queue */ HSHM_ALWAYS_INLINE void PollGrouped(WorkEntry &work_entry, bool flushing) { @@ -421,21 +445,21 @@ class Worker { task->UnsetCoroutine(); } else if (task->IsCoroutine()) { if (!task->IsStarted()) { - rctx.stack_ptr_ = malloc(rctx.stack_size_); + rctx.stack_ptr_ = AllocateStack(); if (rctx.stack_ptr_ == nullptr) { HILOG(kFatal, "The stack pointer of size {} is NULL", - rctx.stack_size_, rctx.stack_ptr_); + stack_size_, rctx.stack_ptr_); } rctx.jmp_.fctx = bctx::make_fcontext( - (char*)rctx.stack_ptr_ + rctx.stack_size_, - rctx.stack_size_, &Worker::RunCoroutine); + (char*)rctx.stack_ptr_ + stack_size_, + stack_size_, &Worker::RunCoroutine); task->SetStarted(); } rctx.jmp_ = bctx::jump_fcontext(rctx.jmp_.fctx, task); if (!task->IsStarted()) { rctx.jmp_.fctx = bctx::make_fcontext( - (char*)rctx.stack_ptr_ + rctx.stack_size_, - rctx.stack_size_, &Worker::RunCoroutine); + (char*)rctx.stack_ptr_ + stack_size_, + stack_size_, &Worker::RunCoroutine); task->SetStarted(); } } else { @@ -450,7 +474,7 @@ class Worker { // HRUN_CLIENT->node_id_, task->task_node_, task->task_state_, id_); entry->complete_ = true; if (task->IsCoroutine()) { - free(rctx.stack_ptr_); + FreeStack(rctx.stack_ptr_); } RemoveTaskGroup(task, exec, work_entry.lane_id_, is_remote); EndTask(lane, exec, task, off); diff --git a/hrun/src/work_orchestrator.cc b/hrun/src/work_orchestrator.cc index f8abfd370..60764b184 100644 --- a/hrun/src/work_orchestrator.cc +++ b/hrun/src/work_orchestrator.cc @@ -134,7 +134,7 @@ void WorkOrchestrator::DedicateCores() { affiner.IgnorePids(worker_pids); affiner.SetCpus(cpu_ids); int count = affiner.AffineAll(); - HILOG(kInfo, "Affining {} processes to {} cores", count, cpu_ids.size()); + // HILOG(kInfo, "Affining {} processes to {} cores", count, cpu_ids.size()); } } // namespace hrun diff --git a/hrun/tasks_required/worch_proc_round_robin/src/worch_proc_round_robin.cc b/hrun/tasks_required/worch_proc_round_robin/src/worch_proc_round_robin.cc index 6c6db3f94..44e8a9e5d 100644 --- a/hrun/tasks_required/worch_proc_round_robin/src/worch_proc_round_robin.cc +++ b/hrun/tasks_required/worch_proc_round_robin/src/worch_proc_round_robin.cc @@ -34,7 +34,7 @@ class Server : public TaskLib { /** Schedule running processes */ void Schedule(ScheduleTask *task, RunContext &rctx) { - // HRUN_WORK_ORCHESTRATOR->DedicateCores(); + HRUN_WORK_ORCHESTRATOR->DedicateCores(); } void MonitorSchedule(u32 mode, ScheduleTask *task, RunContext &rctx) { } From 449fae3f1236f186d8fd12aa7ea6ac8981ad37d5 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Fri, 22 Dec 2023 20:00:35 -0600 Subject: [PATCH 17/19] API bench and latency test --- benchmark/hermes_api_bench.cc | 4 +--- benchmark/test_latency.cc | 4 ++-- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/benchmark/hermes_api_bench.cc b/benchmark/hermes_api_bench.cc index 4622ad2af..47c2f8547 100644 --- a/benchmark/hermes_api_bench.cc +++ b/benchmark/hermes_api_bench.cc @@ -147,7 +147,6 @@ void CreateBucketTest(int nprocs, int rank, MpiTimer t(MPI_COMM_WORLD); t.Resume(); hapi::Context ctx; - std::unordered_map mdm_; for (size_t i = 0; i < bkts_per_rank; ++i) { int bkt_name_int = rank * bkts_per_rank + i; std::string bkt_name = std::to_string(bkt_name_int); @@ -242,8 +241,7 @@ int main(int argc, char **argv) { MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); - TRANSPARENT_HRUN(); - HERMES->ClientInit(); + TRANSPARENT_HERMES(); // Get mode REQUIRE_ARGC_GE(2) diff --git a/benchmark/test_latency.cc b/benchmark/test_latency.cc index ab3cb3dc1..af1636956 100644 --- a/benchmark/test_latency.cc +++ b/benchmark/test_latency.cc @@ -270,8 +270,8 @@ TEST_CASE("TestRoundTripLatency") { size_t ops = (1 << 20); // size_t ops = 1024; for (size_t i = 0; i < ops; ++i) { - client.MdRoot(hrun::DomainId::GetLocal()); - // client.MdPushRoot(hrun::DomainId::GetLocal()); + // client.MdRoot(hrun::DomainId::GetLocal()); + client.MdPushRoot(hrun::DomainId::GetLocal()); } t.Pause(); From b0fa2efba6bac5f66aa0b425db4a195e40f744ab Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Tue, 26 Dec 2023 17:19:11 -0600 Subject: [PATCH 18/19] Don't fatal if state not found --- hrun/include/hrun/api/hrun_client.h | 16 ++++++++++++---- hrun/include/hrun/task_registry/task_lib.h | 9 +++++---- hrun/include/hrun/work_orchestrator/worker.h | 6 +++--- 3 files changed, 20 insertions(+), 11 deletions(-) diff --git a/hrun/include/hrun/api/hrun_client.h b/hrun/include/hrun/api/hrun_client.h index c892b9a2f..1f61023ce 100644 --- a/hrun/include/hrun/api/hrun_client.h +++ b/hrun/include/hrun/api/hrun_client.h @@ -287,13 +287,21 @@ class Client : public ConfigurationManager { return alloc->Convert(p); } + /** Get the queue ID */ + HSHM_ALWAYS_INLINE + QueueId GetQueueId(const TaskStateId &id) { + if (id == HRUN_QM_CLIENT->process_queue_) { + return HRUN_QM_CLIENT->process_queue_; + } else { + return HRUN_QM_CLIENT->admin_queue_; + } + } + /** Get a queue by its ID */ HSHM_ALWAYS_INLINE MultiQueue* GetQueue(const QueueId &queue_id) { - if (queue_id == HRUN_QM_CLIENT->process_queue_) { - return queue_manager_.GetQueue(queue_id); - } - return queue_manager_.GetQueue(HRUN_QM_CLIENT->admin_queue_); + QueueId real_id = GetQueueId(queue_id); + return queue_manager_.GetQueue(real_id); } /** Detect if a task is local or remote */ diff --git a/hrun/include/hrun/task_registry/task_lib.h b/hrun/include/hrun/task_registry/task_lib.h index fe9d85a1b..ebadd7938 100644 --- a/hrun/include/hrun/task_registry/task_lib.h +++ b/hrun/include/hrun/task_registry/task_lib.h @@ -45,9 +45,10 @@ class TaskLib { TaskLib() : id_(TaskStateId::GetNull()) {} /** Emplace Constructor */ - void Init(const TaskStateId &id, const std::string &name) { + void Init(const TaskStateId &id, const QueueId &queue_id, + const std::string &name) { id_ = id; - queue_id_ = QueueId(id); + queue_id_ = queue_id; name_ = name; } @@ -125,13 +126,13 @@ typedef const char* (*get_task_lib_name_t)(void); void* alloc_state(hrun::Admin::CreateTaskStateTask *task, const char *state_name) {\ hrun::TaskState *exec = reinterpret_cast(\ new TYPE_UNWRAP(TRAIT_CLASS)());\ - exec->Init(task->id_, state_name);\ + exec->Init(task->id_, HRUN_CLIENT->GetQueueId(task->id_), state_name);\ return exec;\ }\ void* create_state(hrun::Admin::CreateTaskStateTask *task, const char *state_name) {\ hrun::TaskState *exec = reinterpret_cast(\ new TYPE_UNWRAP(TRAIT_CLASS)());\ - exec->Init(task->id_, state_name);\ + exec->Init(task->id_, HRUN_CLIENT->GetQueueId(task->id_), state_name);\ RunContext rctx(0);\ exec->Run(hrun::TaskMethod::kConstruct, task, rctx);\ return exec;\ diff --git a/hrun/include/hrun/work_orchestrator/worker.h b/hrun/include/hrun/work_orchestrator/worker.h index 148aa5786..23c39ed18 100644 --- a/hrun/include/hrun/work_orchestrator/worker.h +++ b/hrun/include/hrun/work_orchestrator/worker.h @@ -401,10 +401,10 @@ class Worker { bool was_end = HRUN_TASK_REGISTRY->task_states_.find(task->task_state_) == HRUN_TASK_REGISTRY->task_states_.end(); HILOG(kInfo, "Was end: {}", was_end); - HELOG(kFatal, "(node {}) Could not find the task state: {}", + HELOG(kError, "(node {}) Could not find the task state: {}", HRUN_CLIENT->node_id_, task->task_state_); - entry->complete_ = true; - EndTask(lane, exec, task, off); + // entry->complete_ = true; + // EndTask(lane, exec, task, off); continue; } // Get task properties From 501141cb00a24d8a101e005f62237b79e077266d Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Tue, 26 Dec 2023 17:28:04 -0600 Subject: [PATCH 19/19] Make task state a warning --- hrun/include/hrun/work_orchestrator/worker.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hrun/include/hrun/work_orchestrator/worker.h b/hrun/include/hrun/work_orchestrator/worker.h index 23c39ed18..c50a91652 100644 --- a/hrun/include/hrun/work_orchestrator/worker.h +++ b/hrun/include/hrun/work_orchestrator/worker.h @@ -401,7 +401,7 @@ class Worker { bool was_end = HRUN_TASK_REGISTRY->task_states_.find(task->task_state_) == HRUN_TASK_REGISTRY->task_states_.end(); HILOG(kInfo, "Was end: {}", was_end); - HELOG(kError, "(node {}) Could not find the task state: {}", + HELOG(kWarning, "(node {}) Could not find the task state: {}", HRUN_CLIENT->node_id_, task->task_state_); // entry->complete_ = true; // EndTask(lane, exec, task, off);