From 39f405b2002071f4ac8901290d394646ee4f7ebe Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Sat, 30 Dec 2023 06:10:54 -0600 Subject: [PATCH 1/3] Flush globally --- hrun/src/hrun_stop_runtime.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hrun/src/hrun_stop_runtime.cc b/hrun/src/hrun_stop_runtime.cc index f5cb33d02..00228ef2b 100644 --- a/hrun/src/hrun_stop_runtime.cc +++ b/hrun/src/hrun_stop_runtime.cc @@ -14,5 +14,5 @@ int main() { TRANSPARENT_HRUN(); - HRUN_ADMIN->StopRuntimeRoot(hrun::DomainId::GetLocal()); + HRUN_ADMIN->StopRuntimeRoot(hrun::DomainId::GetGlobal()); } \ No newline at end of file From f0b6f2a82957b5425b96b87410654607492ba4b5 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Sat, 30 Dec 2023 06:31:09 -0600 Subject: [PATCH 2/3] Graceful terminator function --- hrun/include/hrun/hrun_types.h | 28 ++++++++++++++++--- hrun/src/hrun_runtime.cc | 12 ++++++-- hrun/src/hrun_stop_runtime.cc | 2 +- .../include/hrun_admin/hrun_admin.h | 7 +++-- .../include/hrun_admin/hrun_admin_tasks.h | 2 +- .../include/remote_queue/remote_queue.h | 5 ++++ test/unit/ipc/test_finalize.cc | 2 +- 7 files changed, 46 insertions(+), 12 deletions(-) diff --git a/hrun/include/hrun/hrun_types.h b/hrun/include/hrun/hrun_types.h index 7ac5251fc..d167add46 100644 --- a/hrun/include/hrun/hrun_types.h +++ b/hrun/include/hrun/hrun_types.h @@ -90,10 +90,16 @@ enum class HrunMode { struct DomainId { bitfield32_t flags_; /**< Flags indicating how to interpret id */ u32 id_; /**< The domain id, 0 is NULL */ - DOMAIN_FLAG_T kLocal = BIT_OPT(u32, 0); /**< Include local node in scheduling decision */ - DOMAIN_FLAG_T kGlobal = BIT_OPT(u32, 1); /**< Use all nodes in scheduling decision */ - DOMAIN_FLAG_T kSet = BIT_OPT(u32, 2); /**< ID represents node set ID, not a single node */ - DOMAIN_FLAG_T kNode = BIT_OPT(u32, 3); /**< ID represents a specific node */ + DOMAIN_FLAG_T kLocal = + BIT_OPT(u32, 0); /**< Use local node in scheduling decision */ + DOMAIN_FLAG_T kGlobal = + BIT_OPT(u32, 1); /**< Use all nodes in scheduling decision */ + DOMAIN_FLAG_T kNoLocal = + BIT_OPT(u32, 4); /**< Don't use local node in scheduling decision */ + DOMAIN_FLAG_T kSet = + BIT_OPT(u32, 2); /**< ID represents node set ID, not a single node */ + DOMAIN_FLAG_T kNode = + BIT_OPT(u32, 3); /**< ID represents a specific node */ /** Serialize domain id */ template @@ -170,6 +176,20 @@ struct DomainId { return id; } + /** Domain doesn't include this node */ + bool IsNoLocal() const { + return flags_.Any(kNoLocal); + } + + /** DomainId representing all nodes, except this one */ + HSHM_ALWAYS_INLINE + static DomainId GetGlobalMinusLocal() { + DomainId id; + id.id_ = 0; + id.flags_.SetBits(kGlobal | kNoLocal); + return id; + } + /** DomainId represents a named node set */ HSHM_ALWAYS_INLINE bool IsSet() const { diff --git a/hrun/src/hrun_runtime.cc b/hrun/src/hrun_runtime.cc index 8af7cc93d..b738628ba 100644 --- a/hrun/src/hrun_runtime.cc +++ b/hrun/src/hrun_runtime.cc @@ -168,8 +168,16 @@ Runtime::ResolveDomainId(const DomainId &domain_id) { std::vector ids; if (domain_id.IsGlobal()) { ids.reserve(rpc_.hosts_.size()); - for (HostInfo &host_info : rpc_.hosts_) { - ids.push_back(DomainId::GetNode(host_info.node_id_)); + if (domain_id.IsNoLocal()) { + for (HostInfo &host_info : rpc_.hosts_) { + if (host_info.node_id_ != rpc_.node_id_) { + ids.push_back(DomainId::GetNode(host_info.node_id_)); + } + } + } else { + for (HostInfo &host_info : rpc_.hosts_) { + ids.push_back(DomainId::GetNode(host_info.node_id_)); + } } } else if (domain_id.IsNode()) { ids.reserve(1); diff --git a/hrun/src/hrun_stop_runtime.cc b/hrun/src/hrun_stop_runtime.cc index 00228ef2b..431edb6fb 100644 --- a/hrun/src/hrun_stop_runtime.cc +++ b/hrun/src/hrun_stop_runtime.cc @@ -14,5 +14,5 @@ int main() { TRANSPARENT_HRUN(); - HRUN_ADMIN->StopRuntimeRoot(hrun::DomainId::GetGlobal()); + HRUN_ADMIN->StopRuntimeRoot(); } \ No newline at end of file diff --git a/hrun/tasks_required/hrun_admin/include/hrun_admin/hrun_admin.h b/hrun/tasks_required/hrun_admin/include/hrun_admin/hrun_admin.h index 5af7801de..ad05c76db 100644 --- a/hrun/tasks_required/hrun_admin/include/hrun_admin/hrun_admin.h +++ b/hrun/tasks_required/hrun_admin/include/hrun_admin/hrun_admin.h @@ -177,9 +177,10 @@ class Client : public TaskLibClient { task, task_node, domain_id); } HRUN_TASK_NODE_ADMIN_ROOT(StopRuntime); - void StopRuntimeRoot(const DomainId &domain_id) { - FlushRoot(domain_id); - AsyncStopRuntimeRoot(domain_id); + void StopRuntimeRoot() { + FlushRoot(DomainId::GetGlobal()); + AsyncStopRuntimeRoot(DomainId::GetGlobalMinusLocal()); + AsyncStopRuntimeRoot(DomainId::GetLocal()); } /** Set work orchestrator queue policy */ diff --git a/hrun/tasks_required/hrun_admin/include/hrun_admin/hrun_admin_tasks.h b/hrun/tasks_required/hrun_admin/include/hrun_admin/hrun_admin_tasks.h index 48e208849..1d3af4b68 100644 --- a/hrun/tasks_required/hrun_admin/include/hrun_admin/hrun_admin_tasks.h +++ b/hrun/tasks_required/hrun_admin/include/hrun_admin/hrun_admin_tasks.h @@ -340,7 +340,7 @@ struct StopRuntimeTask : public Task, TaskFlags { prio_ = TaskPrio::kAdmin; task_state_ = HRUN_QM_CLIENT->admin_task_state_; method_ = Method::kStopRuntime; - task_flags_.SetBits(TASK_FIRE_AND_FORGET); + task_flags_.SetBits(TASK_FIRE_AND_FORGET | TASK_FLUSH); domain_id_ = domain_id; } diff --git a/hrun/tasks_required/remote_queue/include/remote_queue/remote_queue.h b/hrun/tasks_required/remote_queue/include/remote_queue/remote_queue.h index 21dd89ddf..358643f95 100644 --- a/hrun/tasks_required/remote_queue/include/remote_queue/remote_queue.h +++ b/hrun/tasks_required/remote_queue/include/remote_queue/remote_queue.h @@ -58,6 +58,11 @@ class Client : public TaskLibClient { void Disperse(Task *orig_task, TaskState *exec, std::vector &domain_ids) { + if (domain_ids.size() == 0) { + orig_task->SetModuleComplete(); + return; + } + // Serialize task + create the wait task orig_task->UnsetStarted(); BinaryOutputArchive ar(DomainId::GetNode(HRUN_CLIENT->node_id_)); diff --git a/test/unit/ipc/test_finalize.cc b/test/unit/ipc/test_finalize.cc index 4e3ece644..45cfb9284 100644 --- a/test/unit/ipc/test_finalize.cc +++ b/test/unit/ipc/test_finalize.cc @@ -15,5 +15,5 @@ #include "hrun_admin/hrun_admin.h" TEST_CASE("TestFinalize") { - HRUN_ADMIN->AsyncStopRuntimeRoot(hrun::DomainId::GetGlobal()); + HRUN_ADMIN->StopRuntimeRoot(); } From 317c8b2a279a6f5195c3f09a625f680297247310 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Sat, 30 Dec 2023 06:36:33 -0600 Subject: [PATCH 3/3] Don't join and free xstream --- hrun/src/work_orchestrator.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hrun/src/work_orchestrator.cc b/hrun/src/work_orchestrator.cc index 60764b184..310f16794 100644 --- a/hrun/src/work_orchestrator.cc +++ b/hrun/src/work_orchestrator.cc @@ -88,8 +88,8 @@ void WorkOrchestrator::Join() { kill_requested_.store(true); for (std::unique_ptr &worker : workers_) { worker->thread_->join(); - ABT_xstream_join(xstream_); - ABT_xstream_free(&xstream_); +// ABT_xstream_join(xstream_); +// ABT_xstream_free(&xstream_); } }