From 5183080b926b647ca707a36a94ace346b35c7a95 Mon Sep 17 00:00:00 2001 From: inikokali <56083118+inikokali@users.noreply.github.com> Date: Tue, 21 Nov 2023 15:43:25 +0200 Subject: [PATCH] Integration of MPI testing (#639) Previously, MPI testing occurred in a separate file from the main test.sh script responsible for all testing operations. We can now test MPI using the same Daphne scripts employed for gRPC testing. - Added --mpi flag to `test.sh`. - Added a testcase for MPI in `DistributedTest.cpp`. - Minor changes in `Utils.h`, specifically in runProgram() so as it can be used to run MPI tests. - Renamed the old `mpi_testing` bash script to `mpi_example`. - Removed a simple `.daph` script that we used during the early development of MPI. --- mpi_testing.sh => mpi_example.sh | 2 +- scripts/examples/matrix_addition_for_mpi.daph | 33 ----------- test.sh | 7 ++- test/api/cli/Utils.h | 9 ++- test/api/cli/distributed/DistributedTest.cpp | 59 +++++++++++++++++-- 5 files changed, 69 insertions(+), 41 deletions(-) rename mpi_testing.sh => mpi_example.sh (79%) delete mode 100644 scripts/examples/matrix_addition_for_mpi.daph diff --git a/mpi_testing.sh b/mpi_example.sh similarity index 79% rename from mpi_testing.sh rename to mpi_example.sh index 93c47b716..65e5b51e7 100755 --- a/mpi_testing.sh +++ b/mpi_example.sh @@ -14,4 +14,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -DISTRIBUTED_WORKERS=unused mpirun --allow-run-as-root -np 4 ./bin/daphne --distributed --dist_backend=MPI --vec scripts/examples/matrix_addition_for_mpi.daph +mpirun --allow-run-as-root -np 4 ./bin/daphne --distributed --dist_backend=MPI --vec scripts/examples/distributed.daph diff --git a/scripts/examples/matrix_addition_for_mpi.daph b/scripts/examples/matrix_addition_for_mpi.daph deleted file mode 100644 index a4c9837b4..000000000 --- a/scripts/examples/matrix_addition_for_mpi.daph +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright 2023 The DAPHNE Consortium - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -x = 1.0; -y = 2.0; - -m1 = rand(6000, 15000, 1.0, 1000.0, 1.0, 42); -m2= rand(6000, 15000, 1.0, 1000.0, 1.0, 43); -print("---------------"); -print("part of random matrix m1"); -print(m1[1:5,1:5]); -print("part of random matrix m2"); -print(m2[1:5,1:5]); - -m3= m1 + m2; -print("m3 = m1 + m2"); -print(m3[1:5,1:5]); - -print("Hello world!"); -print("Bye!"); diff --git a/test.sh b/test.sh index 3bfbf7d1d..c5f56b67f 100755 --- a/test.sh +++ b/test.sh @@ -29,6 +29,7 @@ set -e catch2_options="" BUILD_CUDA="" BUILD_FPGAOPENCL="" +BUILD_MPI="" BUILD_DEBUG="" BUILD_DAPHNE=1 @@ -44,6 +45,10 @@ while [[ $# -gt 0 ]]; do echo using FPGAOPENCL export BUILD_FPGAOPENCL="--fpgaopencl" ;; + --mpi) + echo using MPI + export BUILD_MPI="--mpi" + ;; --debug) echo building DEBUG version export BUILD_DEBUG="--debug" @@ -59,7 +64,7 @@ done # Build tests. if [ $BUILD_DAPHNE -gt 0 ]; then - ./build.sh $BUILD_CUDA $BUILD_FPGAOPENCL $BUILD_DEBUG --target run_tests + ./build.sh $BUILD_CUDA $BUILD_FPGAOPENCL $BUILD_MPI $BUILD_DEBUG --target run_tests fi # Preparations for running DaphneLib (Python API) tests and MLIR codegen tests (LLVM LIT) diff --git a/test/api/cli/Utils.h b/test/api/cli/Utils.h index 1c2a29891..0f62b6fb1 100644 --- a/test/api/cli/Utils.h +++ b/test/api/cli/Utils.h @@ -120,8 +120,13 @@ int runProgram(std::stringstream & out, std::stringstream & err, const char * ex close(linkErr[1]); // Execute other program. - execl(execPath, args..., static_cast(nullptr)); - + // If execPath is a path (contains "/") use execl, otherwise use execlp. + // We need this to support "mpirun" for the MPI test cases. + if (std::string(execPath).find("/") != std::string::npos) + execl(execPath, args..., static_cast(nullptr)); + else + execlp(execPath, args..., static_cast(nullptr)); + // execl does not return, unless it failed. throw std::runtime_error("could not execute the program"); } diff --git a/test/api/cli/distributed/DistributedTest.cpp b/test/api/cli/distributed/DistributedTest.cpp index 29a4e2657..fefca370f 100644 --- a/test/api/cli/distributed/DistributedTest.cpp +++ b/test/api/cli/distributed/DistributedTest.cpp @@ -30,7 +30,7 @@ const std::string dirPath = "test/api/cli/distributed/"; -TEST_CASE("Simple distributed execution test", TAG_DISTRIBUTED) +TEST_CASE("Distributed runtime tests using gRPC", TAG_DISTRIBUTED) { auto addr1 = "0.0.0.0:50051"; auto addr2 = "0.0.0.0:50052"; @@ -41,7 +41,7 @@ TEST_CASE("Simple distributed execution test", TAG_DISTRIBUTED) assert(std::getenv("DISTRIBUTED_WORKERS") == nullptr); auto distWorkerStr = std::string(addr1) + ',' + addr2; - SECTION("Execution of distributed scripts"){ + SECTION("Execution of scripts using distributed runtime (gRPC)"){ // TODO Make these script individual DYNAMIC_SECTIONs. for (auto i = 1u; i < 4; ++i) { auto filename = dirPath + "distributed_" + std::to_string(i) + ".daphne"; @@ -65,7 +65,7 @@ TEST_CASE("Simple distributed execution test", TAG_DISTRIBUTED) CHECK(outLocal.str() == outDist.str()); } } - SECTION("Distributed chunked messages"){ + SECTION("Distributed chunked messages (gRPC)"){ auto filename = dirPath + "distributed_2.daphne"; @@ -114,4 +114,55 @@ TEST_CASE("Simple distributed execution test", TAG_DISTRIBUTED) kill(pid1, SIGKILL); kill(pid2, SIGKILL); wait(NULL); -} \ No newline at end of file +} + +#ifdef USE_MPI +TEST_CASE("Distributed runtime tests using MPI", TAG_DISTRIBUTED) +{ + + SECTION("Execution of scripts using distributed runtime (MPI)"){ + // TODO Make these script individual DYNAMIC_SECTIONs. + + for (auto i = 1u; i < 4; ++i) { + auto filename = dirPath + "distributed_" + std::to_string(i) + ".daphne"; + + std::stringstream outLocal; + std::stringstream errLocal; + int status = runDaphne(outLocal, errLocal, filename.c_str()); + + CHECK(errLocal.str() == ""); + REQUIRE(status == StatusCode::SUCCESS); + + std::stringstream outDist; + std::stringstream errDist; + status = runProgram(outDist, errDist, "mpirun", "--allow-run-as-root", "-np", "4", "bin/daphne", "--distributed", "--dist_backend=MPI", filename.c_str()); + + CHECK(errDist.str() == ""); + REQUIRE(status == StatusCode::SUCCESS); + + CHECK(outLocal.str() == outDist.str()); + } + } + SECTION("Distributed chunked messages (MPI)"){ + + auto filename = dirPath + "distributed_2.daphne"; + + std::stringstream outLocal; + std::stringstream errLocal; + + int status = runDaphne(outLocal, errLocal, filename.c_str()); + CHECK(errLocal.str() == ""); + REQUIRE(status == StatusCode::SUCCESS); + + std::stringstream outDist; + std::stringstream errDist; + status = runProgram(outDist, errDist, "mpirun", "--allow-run-as-root", "-np", "4", "bin/daphne", "--distributed", "--dist_backend=MPI", "--max-distr-chunk-size=100", filename.c_str()); + CHECK(errDist.str() == ""); + REQUIRE(status == StatusCode::SUCCESS); + + CHECK(outLocal.str() == outDist.str()); + + } + wait(NULL); +} +#endif \ No newline at end of file