diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml index b1ec41197e..87caed2573 100644 --- a/.github/workflows/documentation.yml +++ b/.github/workflows/documentation.yml @@ -39,10 +39,10 @@ jobs: with: ref: gh-pages clean: false - + - name: Move generated files where git can see them run: cp -rp docs/html/* . - + - name: Prevent generated docs dir from being committed and overwriting on the next run. run: rm -Rf docs/html diff --git a/include/core/Partition_Data.hpp b/include/core/Partition_Data.hpp new file mode 100644 index 0000000000..992edfab9b --- /dev/null +++ b/include/core/Partition_Data.hpp @@ -0,0 +1,13 @@ +#ifndef PARTITION_DATA_H +#define PARTITION_DATA_H + +struct PartitionData +{ + using Tuple = std::tuple; + int mpi_world_rank; + std::unordered_set catchment_ids; + std::unordered_set nexus_ids; + std::vector remote_connections; +}; + +#endif //PARTITION_DATA_H diff --git a/include/core/Partition_One.hpp b/include/core/Partition_One.hpp new file mode 100644 index 0000000000..293160837a --- /dev/null +++ b/include/core/Partition_One.hpp @@ -0,0 +1,37 @@ +#ifndef PARTITION_ONE_H +#define PARTITION_ONE_H + +#include +#include +#include +#include +#include +#include +#include "features/Features.hpp" +#include +#include "Partition_Data.hpp" + +class Partition_One { + + public: + /** + * The function that parses geojson::GeoJSON data and build unordered sets of catchment_ids and nexus_ids + + * @param catchment_collection the geojson::GeoJSON data containing all the necessary hydrofabric info + */ + void generate_partition(geojson::GeoJSON& catchment_collection) + { + for(auto& feature: *catchment_collection) + { + std::string cat_id = feature->get_id(); + partition_data.catchment_ids.emplace(cat_id); + std::string nex_id = feature->get_property("toid").as_string(); + partition_data.nexus_ids.emplace(nex_id); + } + partition_data.mpi_world_rank = 0; + } + + PartitionData partition_data; +}; + +#endif // PARTITION_ONE_H diff --git a/include/core/Partition_Parser.hpp b/include/core/Partition_Parser.hpp index 9c0e740bfb..cc29a40377 100644 --- a/include/core/Partition_Parser.hpp +++ b/include/core/Partition_Parser.hpp @@ -20,18 +20,7 @@ #include "features/Features.hpp" #include #include "JSONProperty.hpp" - -using Tuple = std::tuple; - -//This struct is moved from private section to here so that the unit test function can access it -struct PartitionData -{ - int mpi_world_rank; - std::unordered_set catchment_ids; - std::unordered_set nexus_ids; - std::vector remote_connections; -}; - +#include "Partition_Data.hpp" class Partitions_Parser { @@ -63,8 +52,8 @@ class Partitions_Parser { std::string remote_nex_id; std::string remote_cat_id; std::string direction; - Tuple tmp_tuple; - std::vector remote_conn_vec; + PartitionData::Tuple tmp_tuple; + std::vector remote_conn_vec; int part_counter = 0; for(auto &partition: tree.get_child("partitions")) { //Get partition id diff --git a/src/NGen.cpp b/src/NGen.cpp index 74817737db..f68ba15a9d 100644 --- a/src/NGen.cpp +++ b/src/NGen.cpp @@ -52,6 +52,8 @@ int mpi_rank = 0; #include "core/Partition_Parser.hpp" #include +#include "core/Partition_One.hpp" + std::string PARTITION_PATH = ""; int mpi_num_procs; #endif // NGEN_MPI_ACTIVE @@ -249,10 +251,16 @@ int main(int argc, char *argv[]) { REALIZATION_CONFIG_PATH = argv[5]; #ifdef NGEN_MPI_ACTIVE + + // Initalize MPI + MPI_Init(NULL, NULL); + MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank); + MPI_Comm_size(MPI_COMM_WORLD, &mpi_num_procs); + if (argc >= 7) { PARTITION_PATH = argv[6]; } - else { + else if (mpi_num_procs > 1) { std::cout << "Missing required argument for partition file path." << std::endl; exit(-1); } @@ -261,17 +269,11 @@ int main(int argc, char *argv[]) { if (strcmp(argv[7], MPI_HF_SUB_CLI_FLAG) == 0) { is_subdivided_hydrofabric_wanted = true; } - else { + else if (mpi_num_procs > 1) { std::cout << "Unexpected arg '" << argv[7] << "'; try " << MPI_HF_SUB_CLI_FLAG << std::endl; exit(-1); } } - - // Initalize MPI - MPI_Init(NULL, NULL); - MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank); - MPI_Comm_size(MPI_COMM_WORLD, &mpi_num_procs); - #endif // NGEN_MPI_ACTIVE #ifdef WRITE_PID_FILE_FOR_GDB_SERVER @@ -336,20 +338,23 @@ int main(int argc, char *argv[]) { std::cout << "Building Nexus collection" << std::endl; #ifdef NGEN_MPI_ACTIVE - Partitions_Parser partition_parser(PARTITION_PATH); - // TODO: add something here to make sure this step worked for every rank, and maybe to checksum the file - partition_parser.parse_partition_file(); - - std::vector &partitions = partition_parser.partition_ranks; - PartitionData &local_data = partitions[mpi_rank]; - if (!nexus_subset_ids.empty()) { - std::cerr << "Warning: CLI provided nexus subset will be ignored when using partition config"; - } - if (!catchment_subset_ids.empty()) { - std::cerr << "Warning: CLI provided catchment subset will be ignored when using partition config"; + PartitionData local_data; + if (mpi_num_procs > 1) { + Partitions_Parser partition_parser(PARTITION_PATH); + // TODO: add something here to make sure this step worked for every rank, and maybe to checksum the file + partition_parser.parse_partition_file(); + + std::vector &partitions = partition_parser.partition_ranks; + local_data = std::move(partitions[mpi_rank]); + if (!nexus_subset_ids.empty()) { + std::cerr << "Warning: CLI provided nexus subset will be ignored when using partition config"; + } + if (!catchment_subset_ids.empty()) { + std::cerr << "Warning: CLI provided catchment subset will be ignored when using partition config"; + } + nexus_subset_ids = std::vector(local_data.nexus_ids.begin(), local_data.nexus_ids.end()); + catchment_subset_ids = std::vector(local_data.catchment_ids.begin(), local_data.catchment_ids.end()); } - nexus_subset_ids = std::vector(local_data.nexus_ids.begin(), local_data.nexus_ids.end()); - catchment_subset_ids = std::vector(local_data.catchment_ids.begin(), local_data.catchment_ids.end()); #endif // NGEN_MPI_ACTIVE // TODO: Instead of iterating through a collection of FeatureBase objects mapping to nexi, we instead want to iterate through HY_HydroLocation objects @@ -379,14 +384,14 @@ int main(int argc, char *argv[]) { for(auto& feature: *catchment_collection) { - //feature->set_id(feature->get_property("ID").as_string()); + //feature->set_id(feature->get_property("id").as_string()); nexus_collection->add_feature(feature); - //std::cout<<"Catchment "<get_id()<<" -> Nexus "<get_property("toID").as_string()<get_id()<<" -> Nexus "<get_property("toid").as_string()<update_ids("id"); - std::cout<<"Initializing formulations\n"; + std::cout<<"Initializing formulations" << std::endl; std::shared_ptr manager = std::make_shared(REALIZATION_CONFIG_PATH); manager->read(catchment_collection, utils::getStdOut()); @@ -406,10 +411,17 @@ int main(int argc, char *argv[]) { } } #endif //NGEN_ROUTING_ACTIVE - std::cout<<"Building Feature Index\n"; + std::cout<<"Building Feature Index" <link_features_from_property(nullptr, &link_key); + #ifdef NGEN_MPI_ACTIVE + //mpirun with one processor without partition file + if (mpi_num_procs == 1) { + Partition_One partition_one; + partition_one.generate_partition(catchment_collection); + local_data = std::move(partition_one.partition_data); + } hy_features::HY_Features_MPI features = hy_features::HY_Features_MPI(local_data, nexus_collection, manager, mpi_rank, mpi_num_procs); #else hy_features::HY_Features features = hy_features::HY_Features(nexus_collection, manager); @@ -425,7 +437,11 @@ int main(int argc, char *argv[]) { //Still hacking nexus output for the moment for(const auto& id : features.nexuses()) { #ifdef NGEN_MPI_ACTIVE - if (!features.is_remote_sender_nexus(id)) { + if (mpi_num_procs > 1) { + if (!features.is_remote_sender_nexus(id)) { + nexus_outfiles[id].open(manager->get_output_root() + id + "_output.csv", std::ios::trunc); + } + } else { nexus_outfiles[id].open(manager->get_output_root() + id + "_output.csv", std::ios::trunc); } #else diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index a6aed84bf0..ba8f0624d8 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -229,6 +229,20 @@ ngen_add_test( # NGEN_WITH_MPI ) +########################## Partition_One Tests +ngen_add_test( + test_partition_one + OBJECTS + utils/Partition_One_Test.cpp + LIBRARIES + gmock + NGen::core + NGen::geojson + NGen::geopackage + REQUIRES + NGEN_WITH_SQLITE +) + ########################## MultiLayer Tests ngen_add_test( test_multilayer @@ -452,7 +466,6 @@ ngen_add_test( NGen::logging NGen::ngen_bmi testbmicppmodel - ) # Discover for test_all diff --git a/test/utils/Partition_One_Test.cpp b/test/utils/Partition_One_Test.cpp new file mode 100644 index 0000000000..2e3e1f747a --- /dev/null +++ b/test/utils/Partition_One_Test.cpp @@ -0,0 +1,190 @@ +#include "gtest/gtest.h" +#include "gmock/gmock.h" +#include + +#include +#include +#include + +#include "core/Partition_One.hpp" +#include "FileChecker.h" + + +class PartitionOneTest: public ::testing::Test { + + protected: + + std::vector hydro_fabric_paths; + + std::string catchmentDataFile; + geojson::GeoJSON catchment_collection, nexus_collection; + + std::vector catchment_subset_ids; + std::vector nexus_subset_ids; + + std::unordered_set catchment_ids; + std::unordered_set nexus_ids; + + Partition_One partition_one; + PartitionData partition_data; + + PartitionOneTest() {} + + ~PartitionOneTest() override {} + + std::string file_search(const std::vector &parent_dir_options, const std::string& file_basename) + { + // Build vector of names by building combinations of the path and basename options + std::vector name_combinations; + + // Build so that all path names are tried for given basename before trying a different basename option + for (auto & path_option : parent_dir_options) + name_combinations.push_back(path_option + file_basename); + + return utils::FileChecker::find_first_readable(name_combinations); + } + + void read_file_generate_partition_data() + { + const std::string file_path = file_search(hydro_fabric_paths, "catchment_data.geojson"); + catchment_collection = geojson::read(file_path, catchment_subset_ids); + + for(auto& feature: *catchment_collection) + { + std::string cat_id = feature->get_id(); + partition_data.catchment_ids.emplace(cat_id); + std::string nex_id = feature->get_property("toid").as_string(); + partition_data.nexus_ids.emplace(nex_id); + } + } + + void read_file_nexus_data() + { + const std::string file_path = file_search(hydro_fabric_paths, "nexus_data.geojson"); + nexus_collection = geojson::read(file_path, nexus_subset_ids); + } + + void SetUp() override; + + void TearDown() override; + + void setupArbitraryExampleCase(); + +}; + +void PartitionOneTest::SetUp() { + setupArbitraryExampleCase(); +} + +void PartitionOneTest::TearDown() { + +} + +void PartitionOneTest::setupArbitraryExampleCase() { + hydro_fabric_paths = { + "data/", + "./data/", + "../data/", + "../../data/", + + }; +} + +TEST_F(PartitionOneTest, TestPartitionData_1a) +{ + read_file_generate_partition_data(); + + partition_one.generate_partition(catchment_collection); + PartitionData data_struct = partition_one.partition_data; + catchment_ids = data_struct.catchment_ids; + + //check catchment partition + std::vector cat_id_vec; + // convert unordered_set to vector + for (const auto& id: catchment_ids) { + cat_id_vec.push_back(id); + } + + //sort ids + std::sort(cat_id_vec.begin(), cat_id_vec.end()); + //create set of unique ids + std::set unique(cat_id_vec.begin(), cat_id_vec.end()); + std::set duplicates; + //use set difference to identify all duplicates + std::set_difference(cat_id_vec.begin(), cat_id_vec.end(), unique.begin(), unique.end(), std::inserter(duplicates, duplicates.end())); + + for( auto& id: duplicates){ + std::cout << "duplicates string set contains " << id << std::endl; + } + + //process the original read in data + std::vector input_cat_ids; + for(auto& feature: *catchment_collection) + { + std::string cat_id = feature->get_id(); + input_cat_ids.push_back(cat_id); + } + std::sort(input_cat_ids.begin(), input_cat_ids.end()); + + for (int i = 0; i < input_cat_ids.size(); ++i) { + if (input_cat_ids[i] != cat_id_vec[i]) { + std::cout << "Input cat_id: " << input_cat_ids[i] << " differs from patition cat_id: " << cat_id_vec[i] << std::endl; + } + } + + //get input number of catchments + int num_catchments = catchment_collection->get_size(); + + ASSERT_EQ(catchment_ids.size(), num_catchments); + ASSERT_EQ(duplicates.size(), 0); +} + +TEST_F(PartitionOneTest, TestPartitionData_1b) +{ + read_file_generate_partition_data(); + read_file_nexus_data(); + + partition_one.generate_partition(catchment_collection); + PartitionData data_struct = partition_one.partition_data; + nexus_ids = data_struct.nexus_ids; + + //check nexus partition + std::vector nex_id_vec; + //convert unordered_set to vector + for (const auto& id: nexus_ids) { + nex_id_vec.push_back(id); + } + + //sort ids + std::sort(nex_id_vec.begin(), nex_id_vec.end()); + //create set of unique ids + std::set unique(nex_id_vec.begin(), nex_id_vec.end()); + std::set duplicates; + //use set difference to identify all duplicates + std::set_difference(nex_id_vec.begin(), nex_id_vec.end(), unique.begin(), unique.end(), std::inserter(duplicates, duplicates.end())); + + for( auto& id: duplicates){ + std::cout << "duplicates string set contains " << id << std::endl; + } + + //process the original read in data + std::vector input_nex_ids; + for(auto& feature: *nexus_collection) + { + std::string nex_id = feature->get_id(); + input_nex_ids.push_back(nex_id); + } + std::sort(input_nex_ids.begin(), input_nex_ids.end()); + + for (int i = 0; i < input_nex_ids.size(); ++i) { + if (input_nex_ids[i] != nex_id_vec[i]) { + std::cout << "Input nex_id: " << input_nex_ids[i] << " differs from patition nex_id: " << nex_id_vec[i] << std::endl; + } + } + + //get input number of nexus + int num_nexus = nexus_collection->get_size(); + + ASSERT_EQ(nexus_ids.size(), num_nexus); + ASSERT_EQ(duplicates.size(), 0); +}