Skip to content

Commit

Permalink
Integration of MPI testing (daphne-eu#639)
Browse files Browse the repository at this point in the history
Previously, MPI testing occurred in a separate file from the main test.sh script responsible for all testing operations. We can now test MPI using the same Daphne scripts employed for gRPC testing.

- Added --mpi flag to `test.sh`.
- Added a testcase for MPI in `DistributedTest.cpp`.
- Minor changes in `Utils.h`, specifically in runProgram() so as it can be used to run MPI tests.
- Renamed the old `mpi_testing` bash script to `mpi_example`.
- Removed a simple `.daph` script that we used during the early development of MPI.
  • Loading branch information
inikokali authored Nov 21, 2023
1 parent ae1d021 commit 5183080
Show file tree
Hide file tree
Showing 5 changed files with 69 additions and 41 deletions.
2 changes: 1 addition & 1 deletion mpi_testing.sh → mpi_example.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@
# See the License for the specific language governing permissions and
# limitations under the License.

DISTRIBUTED_WORKERS=unused mpirun --allow-run-as-root -np 4 ./bin/daphne --distributed --dist_backend=MPI --vec scripts/examples/matrix_addition_for_mpi.daph
mpirun --allow-run-as-root -np 4 ./bin/daphne --distributed --dist_backend=MPI --vec scripts/examples/distributed.daph
33 changes: 0 additions & 33 deletions scripts/examples/matrix_addition_for_mpi.daph

This file was deleted.

7 changes: 6 additions & 1 deletion test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ set -e
catch2_options=""
BUILD_CUDA=""
BUILD_FPGAOPENCL=""
BUILD_MPI=""
BUILD_DEBUG=""
BUILD_DAPHNE=1

Expand All @@ -44,6 +45,10 @@ while [[ $# -gt 0 ]]; do
echo using FPGAOPENCL
export BUILD_FPGAOPENCL="--fpgaopencl"
;;
--mpi)
echo using MPI
export BUILD_MPI="--mpi"
;;
--debug)
echo building DEBUG version
export BUILD_DEBUG="--debug"
Expand All @@ -59,7 +64,7 @@ done

# Build tests.
if [ $BUILD_DAPHNE -gt 0 ]; then
./build.sh $BUILD_CUDA $BUILD_FPGAOPENCL $BUILD_DEBUG --target run_tests
./build.sh $BUILD_CUDA $BUILD_FPGAOPENCL $BUILD_MPI $BUILD_DEBUG --target run_tests
fi

# Preparations for running DaphneLib (Python API) tests and MLIR codegen tests (LLVM LIT)
Expand Down
9 changes: 7 additions & 2 deletions test/api/cli/Utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,8 +120,13 @@ int runProgram(std::stringstream & out, std::stringstream & err, const char * ex
close(linkErr[1]);

// Execute other program.
execl(execPath, args..., static_cast<char *>(nullptr));

// If execPath is a path (contains "/") use execl, otherwise use execlp.
// We need this to support "mpirun" for the MPI test cases.
if (std::string(execPath).find("/") != std::string::npos)
execl(execPath, args..., static_cast<char *>(nullptr));
else
execlp(execPath, args..., static_cast<char *>(nullptr));

// execl does not return, unless it failed.
throw std::runtime_error("could not execute the program");
}
Expand Down
59 changes: 55 additions & 4 deletions test/api/cli/distributed/DistributedTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@

const std::string dirPath = "test/api/cli/distributed/";

TEST_CASE("Simple distributed execution test", TAG_DISTRIBUTED)
TEST_CASE("Distributed runtime tests using gRPC", TAG_DISTRIBUTED)
{
auto addr1 = "0.0.0.0:50051";
auto addr2 = "0.0.0.0:50052";
Expand All @@ -41,7 +41,7 @@ TEST_CASE("Simple distributed execution test", TAG_DISTRIBUTED)
assert(std::getenv("DISTRIBUTED_WORKERS") == nullptr);
auto distWorkerStr = std::string(addr1) + ',' + addr2;

SECTION("Execution of distributed scripts"){
SECTION("Execution of scripts using distributed runtime (gRPC)"){
// TODO Make these script individual DYNAMIC_SECTIONs.
for (auto i = 1u; i < 4; ++i) {
auto filename = dirPath + "distributed_" + std::to_string(i) + ".daphne";
Expand All @@ -65,7 +65,7 @@ TEST_CASE("Simple distributed execution test", TAG_DISTRIBUTED)
CHECK(outLocal.str() == outDist.str());
}
}
SECTION("Distributed chunked messages"){
SECTION("Distributed chunked messages (gRPC)"){

auto filename = dirPath + "distributed_2.daphne";

Expand Down Expand Up @@ -114,4 +114,55 @@ TEST_CASE("Simple distributed execution test", TAG_DISTRIBUTED)
kill(pid1, SIGKILL);
kill(pid2, SIGKILL);
wait(NULL);
}
}

#ifdef USE_MPI
TEST_CASE("Distributed runtime tests using MPI", TAG_DISTRIBUTED)
{

SECTION("Execution of scripts using distributed runtime (MPI)"){
// TODO Make these script individual DYNAMIC_SECTIONs.

for (auto i = 1u; i < 4; ++i) {
auto filename = dirPath + "distributed_" + std::to_string(i) + ".daphne";

std::stringstream outLocal;
std::stringstream errLocal;
int status = runDaphne(outLocal, errLocal, filename.c_str());

CHECK(errLocal.str() == "");
REQUIRE(status == StatusCode::SUCCESS);

std::stringstream outDist;
std::stringstream errDist;
status = runProgram(outDist, errDist, "mpirun", "--allow-run-as-root", "-np", "4", "bin/daphne", "--distributed", "--dist_backend=MPI", filename.c_str());

CHECK(errDist.str() == "");
REQUIRE(status == StatusCode::SUCCESS);

CHECK(outLocal.str() == outDist.str());
}
}
SECTION("Distributed chunked messages (MPI)"){

auto filename = dirPath + "distributed_2.daphne";

std::stringstream outLocal;
std::stringstream errLocal;

int status = runDaphne(outLocal, errLocal, filename.c_str());
CHECK(errLocal.str() == "");
REQUIRE(status == StatusCode::SUCCESS);

std::stringstream outDist;
std::stringstream errDist;
status = runProgram(outDist, errDist, "mpirun", "--allow-run-as-root", "-np", "4", "bin/daphne", "--distributed", "--dist_backend=MPI", "--max-distr-chunk-size=100", filename.c_str());
CHECK(errDist.str() == "");
REQUIRE(status == StatusCode::SUCCESS);

CHECK(outLocal.str() == outDist.str());

}
wait(NULL);
}
#endif

0 comments on commit 5183080

Please sign in to comment.