From 3a212f9fac5249dd4201d8dc2a40a3ce2b1152f7 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Mon, 6 Nov 2023 20:02:27 -0600 Subject: [PATCH 001/114] Print the placement decision --- tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc index e3d486f91..9e31bdc67 100644 --- a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc +++ b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc @@ -46,6 +46,7 @@ class Server : public TaskLib { * ===================================*/ std::vector target_tasks_; std::vector targets_; + bdev::Client *fallback_target_; std::unordered_map target_map_; Client blob_mdm_; bucket_mdm::Client bkt_mdm_; @@ -94,6 +95,7 @@ class Server : public TaskLib { [](const bdev::Client &a, const bdev::Client &b) { return a.bandwidth_ > b.bandwidth_; }); + fallback_target_ = &targets_.back(); blob_mdm_.Init(id_); HILOG(kInfo, "(node {}) Created Blob MDM", HRUN_CLIENT->node_id_); task->SetModuleComplete(); @@ -292,6 +294,7 @@ class Server : public TaskLib { // Allocate blob buffers for (PlacementSchema &schema : schema_vec) { + schema.plcmnts_.emplace_back(0, fallback_target_->id_); for (size_t sub_idx = 0; sub_idx < schema.plcmnts_.size(); ++sub_idx) { SubPlacement &placement = schema.plcmnts_[sub_idx]; TargetInfo &bdev = *target_map_[placement.tid_]; @@ -301,11 +304,13 @@ class Server : public TaskLib { placement.size_, blob_info.buffers_); alloc_task->Wait(task); + HILOG(kInfo, "Placing {}/{} bytes in target {} of bw {}", + alloc_task->alloc_size_, task->data_size_, placement.tid_, bdev.bandwidth_) if (alloc_task->alloc_size_ < alloc_task->size_) { - // SubPlacement &next_placement = schema.plcmnts_[sub_idx + 1]; - // size_t diff = alloc_task->size_ - alloc_task->alloc_size_; - // next_placement.size_ += diff; - HELOG(kFatal, "Ran outta space in this tier -- will fix soon") + SubPlacement &next_placement = schema.plcmnts_[sub_idx + 1]; + size_t diff = alloc_task->size_ - alloc_task->alloc_size_; + next_placement.size_ += diff; + HILOG(kInfo, "Delegating more space to the next task ({} bytes)", diff); } // bdev.monitor_task_->rem_cap_ -= alloc_task->alloc_size_; HRUN_CLIENT->DelTask(alloc_task); From d3326db2f5eca40580605b6ce723a5810d5c867a Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Mon, 6 Nov 2023 20:27:57 -0600 Subject: [PATCH 002/114] Add more to info log --- include/hermes/dpe/minimize_io_time.h | 1 + tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/include/hermes/dpe/minimize_io_time.h b/include/hermes/dpe/minimize_io_time.h index 7c09b6c96..e4a7c8b1d 100644 --- a/include/hermes/dpe/minimize_io_time.h +++ b/include/hermes/dpe/minimize_io_time.h @@ -44,6 +44,7 @@ class MinimizeIoTime : public Dpe { size_t rem_cap = target.GetRemCap(); if (target.score_ > score || rem_cap < blob_size) { // TODO(llogan): add other considerations of this Dpe + HILOG(kInfo, "Not enough space or score in {} of bw {}", target.id_, target.bandwidth_) continue; } if (ctx.blob_score_ == -1) { diff --git a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc index 9e31bdc67..68ab20278 100644 --- a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc +++ b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc @@ -95,6 +95,10 @@ class Server : public TaskLib { [](const bdev::Client &a, const bdev::Client &b) { return a.bandwidth_ > b.bandwidth_; }); + for (bdev::Client &client : targets_) { + HILOG(kInfo, "(node {}) Target {} has bw {}", HRUN_CLIENT->node_id_, + client.id_, client.bandwidth_); + } fallback_target_ = &targets_.back(); blob_mdm_.Init(id_); HILOG(kInfo, "(node {}) Created Blob MDM", HRUN_CLIENT->node_id_); From 7a4cc51884e8856c3b2c75584ada90cda9dcd82c Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Mon, 6 Nov 2023 20:44:11 -0600 Subject: [PATCH 003/114] Print statement --- tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc index 68ab20278..d8162d98f 100644 --- a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc +++ b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc @@ -308,8 +308,10 @@ class Server : public TaskLib { placement.size_, blob_info.buffers_); alloc_task->Wait(task); - HILOG(kInfo, "Placing {}/{} bytes in target {} of bw {}", - alloc_task->alloc_size_, task->data_size_, placement.tid_, bdev.bandwidth_) + HILOG(kInfo, "(node {}) Placing {}/{} bytes in target {} of bw {}", + HRUN_CLIENT->node_id_, + alloc_task->alloc_size_, task->data_size_, + placement.tid_, bdev.bandwidth_) if (alloc_task->alloc_size_ < alloc_task->size_) { SubPlacement &next_placement = schema.plcmnts_[sub_idx + 1]; size_t diff = alloc_task->size_ - alloc_task->alloc_size_; From 2e06c1dc392d4e2c85cf552d182aba08d525546f Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Mon, 6 Nov 2023 21:47:36 -0600 Subject: [PATCH 004/114] Map the clients properly --- tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc index d8162d98f..4e34a490b 100644 --- a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc +++ b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc @@ -89,13 +89,13 @@ class Server : public TaskLib { tgt_task->Wait(task); bdev::Client &client = targets_[i]; client.AsyncCreateComplete(tgt_task); - target_map_.emplace(client.id_, &client); } std::sort(targets_.begin(), targets_.end(), [](const bdev::Client &a, const bdev::Client &b) { return a.bandwidth_ > b.bandwidth_; }); for (bdev::Client &client : targets_) { + target_map_.emplace(client.id_, &client); HILOG(kInfo, "(node {}) Target {} has bw {}", HRUN_CLIENT->node_id_, client.id_, client.bandwidth_); } From 0629ec0c54c1ffa72f93329722b52f1ccea70b6e Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Mon, 6 Nov 2023 22:26:00 -0600 Subject: [PATCH 005/114] Try adding tag to blob lane hash --- .../include/hermes_blob_mdm/hermes_blob_mdm.h | 4 ++-- .../hermes_blob_mdm/hermes_blob_mdm_tasks.h | 16 +++++++++++----- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm.h b/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm.h index 5f4a4acf3..3443a511e 100644 --- a/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm.h +++ b/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm.h @@ -94,7 +94,7 @@ class Client : public TaskLibClient { const TaskNode &task_node, TagId tag_id, const hshm::charbuf &blob_name) { - u32 hash = std::hash{}(blob_name); + u32 hash = HashBlobName(tag_id, blob_name); HRUN_CLIENT->ConstructTask( task, task_node, DomainId::GetNode(HASH_TO_NODE_ID(hash)), id_, tag_id, blob_name); @@ -252,7 +252,7 @@ class Client : public TaskLibClient { const TaskNode &task_node, const TagId &tag_id, const hshm::charbuf &blob_name) { - u32 hash = std::hash{}(blob_name); + u32 hash = HashBlobName(tag_id, blob_name); HRUN_CLIENT->ConstructTask( task, task_node, DomainId::GetNode(HASH_TO_NODE_ID(hash)), id_, tag_id, blob_name); diff --git a/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h b/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h index d25b251d5..90fb30e6d 100644 --- a/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h +++ b/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h @@ -23,6 +23,12 @@ using hrun::Task; using hrun::TaskFlags; using hrun::DataTransfer; +static inline u32 HashBlobName(const TagId &tag_id, const hshm::charbuf &blob_name) { + u32 h2 = std::hash{}(tag_id); + u32 h1 = std::hash{}(blob_name); + return std::hash{}(h1 ^ h2); +} + /** Phases of the construct task */ using hrun::Admin::CreateTaskStatePhase; class ConstructTaskPhase : public CreateTaskStatePhase { @@ -172,7 +178,7 @@ struct GetOrCreateBlobIdTask : public Task, TaskFlags { const hshm::charbuf &blob_name) : Task(alloc) { // Initialize task task_node_ = task_node; - lane_hash_ = std::hash{}(blob_name); + lane_hash_ = HashBlobName(tag_id, blob_name); prio_ = TaskPrio::kLowLatency; task_state_ = state_id; method_ = Method::kGetOrCreateBlobId; @@ -270,7 +276,7 @@ struct PutBlobTask : public Task, TaskFlags lane_hash_ = blob_id.hash_; domain_id_ = domain_id; } else { - lane_hash_ = std::hash{}(blob_name); + lane_hash_ = HashBlobName(tag_id, blob_name); domain_id_ = DomainId::GetNode(HASH_TO_NODE_ID(lane_hash_)); } @@ -377,7 +383,7 @@ struct GetBlobTask : public Task, TaskFlags lane_hash_ = blob_id.hash_; domain_id_ = domain_id; } else { - lane_hash_ = std::hash{}(blob_name); + lane_hash_ = HashBlobName(tag_id, blob_name); domain_id_ = DomainId::GetNode(HASH_TO_NODE_ID(lane_hash_)); } @@ -592,7 +598,7 @@ struct GetBlobIdTask : public Task, TaskFlags { const hshm::charbuf &blob_name) : Task(alloc) { // Initialize task task_node_ = task_node; - lane_hash_ = std::hash{}(blob_name); + lane_hash_ = HashBlobName(tag_id, blob_name); prio_ = TaskPrio::kLowLatency; task_state_ = state_id; method_ = Method::kGetBlobId; @@ -725,7 +731,7 @@ struct GetBlobSizeTask : public Task, TaskFlags { lane_hash_ = blob_id.hash_; domain_id_ = domain_id; } else { - lane_hash_ = std::hash{}(blob_name); + lane_hash_ = HashBlobName(tag_id, blob_name); domain_id_ = DomainId::GetNode(HASH_TO_NODE_ID(lane_hash_)); } From 1f743bd08eba7eb465e7c73e647459fddbb75821 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Tue, 7 Nov 2023 12:42:31 -0600 Subject: [PATCH 006/114] Borg manipulates scores and can call reorganize --- include/hermes/hermes_types.h | 1 + include/hermes/score_histogram.h | 46 +++++++- tasks/bdev/include/bdev/bdev.h | 16 +-- tasks/bdev/include/bdev/bdev_tasks.h | 3 +- tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc | 105 ++++++++++++------- tasks/posix_bdev/src/posix_bdev.cc | 6 +- tasks/ram_bdev/src/ram_bdev.cc | 6 +- test/unit/hermes/test_bucket.cc | 5 +- test/unit/pipelines/test_borg.yaml | 10 ++ 9 files changed, 143 insertions(+), 55 deletions(-) create mode 100644 test/unit/pipelines/test_borg.yaml diff --git a/include/hermes/hermes_types.h b/include/hermes/hermes_types.h index d3dc50b2b..49ad4eb75 100644 --- a/include/hermes/hermes_types.h +++ b/include/hermes/hermes_types.h @@ -305,6 +305,7 @@ struct BlobInfo { blob_size_ = other.blob_size_; max_blob_size_ = other.max_blob_size_; score_ = other.score_; + user_score_ = other.user_score_; access_freq_ = other.access_freq_.load(); last_access_ = other.last_access_; mod_count_ = other.mod_count_.load(); diff --git a/include/hermes/score_histogram.h b/include/hermes/score_histogram.h index 4775e39a6..7341a5ed7 100644 --- a/include/hermes/score_histogram.h +++ b/include/hermes/score_histogram.h @@ -20,7 +20,7 @@ namespace hermes { struct HistEntry { - std::atomic x_; + std::atomic x_; /** Default constructor */ HistEntry() : x_(0) {} @@ -87,16 +87,25 @@ class Histogram { histogram_.resize(num_bins); } + /** Get the bin score belongs to */ + u32 GetBin(float score) { + u32 bin = score * histogram_.size(); + if (bin >= histogram_.size()) { + bin = histogram_.size() - 1; + } + return bin; + } + /** Increment histogram */ void Increment(float score) { - int bin = (int)(1.0/score - 1.0); + u32 bin = GetBin(score); histogram_[bin].increment(); count_.fetch_add(1); } /** Decrement histogram */ void Decrement(float score) { - int bin = (int)(1.0/score - 1.0); + u32 bin = GetBin(score); histogram_[bin].x_.fetch_sub(1); count_.fetch_sub(1); } @@ -104,15 +113,42 @@ class Histogram { /** * Determine if a blob should be elevated (1), * stationary (0), or demoted (-1) + * + * @input score a number between 0 and 1 + * @return Percentile (a number between 0 and 100) * */ - u16 GetPercentile(float score) { - int bin = (int)(1.0/score - 1.0); + u32 GetPercentile(float score) { + if (score == 0) { + return 0; + } + if (count_ == 0) { + return 100; + } + u32 bin = GetBin(score); u32 count = 0; for (u32 i = 0; i <= bin; ++i) { count += histogram_[i].x_.load(); } return count * 100 / count_; } + + /** + * Get quantile. + * @input percentile is a number between 0 and 100 + * */ + float GetQuantile(u32 percentile) { + u32 count = 0; + if (count_ == 0) { + return 0.0; + } + for (u32 i = 0; i < histogram_.size(); ++i) { + count += histogram_[i].x_.load(); + if (count * 100 / count_ >= percentile && count > 0) { + return (i + 1) / histogram_.size(); + } + } + return 0.0; + } }; } // namespace hermes diff --git a/tasks/bdev/include/bdev/bdev.h b/tasks/bdev/include/bdev/bdev.h index 057815ea4..a6d23ee67 100644 --- a/tasks/bdev/include/bdev/bdev.h +++ b/tasks/bdev/include/bdev/bdev.h @@ -21,6 +21,8 @@ class Client : public TaskLibClient { double bandwidth_; /**< the bandwidth of the device */ double latency_; /**< the latency of the device */ float score_; /**< Relative importance of this tier */ + f32 borg_min_thresh_; /**< Capacity percentage too low */ + f32 borg_max_thresh_; /**< Capacity percentage too high */ public: Client() : score_(0) {} @@ -31,6 +33,8 @@ class Client : public TaskLibClient { bandwidth_ = dev_info.bandwidth_; latency_ = dev_info.latency_; score_ = 0; + borg_min_thresh_ = dev_info.borg_min_thresh_; + borg_max_thresh_ = dev_info.borg_max_thresh_; } /** Async create task state */ @@ -153,15 +157,15 @@ class Client : public TaskLibClient { class Server { public: ssize_t rem_cap_; /**< Remaining capacity */ - // Histogram score_hist_; /**< Score distribution */ + Histogram score_hist_; /**< Score distribution */ public: /** Update the blob score in this tier */ void UpdateScore(UpdateScoreTask *task, RunContext &ctx) { -// if (task->old_score_ >= 0) { -// score_hist_.Decrement(task->old_score_); -// } -// score_hist_.Increment(task->new_score_); + if (task->old_score_ >= 0) { + score_hist_.Decrement(task->old_score_); + } + score_hist_.Increment(task->new_score_); } void MonitorUpdateScore(u32 mode, UpdateScoreTask *task, RunContext &ctx) { } @@ -169,7 +173,7 @@ class Server { /** Stat capacity and scores */ void StatBdev(StatBdevTask *task, RunContext &ctx) { task->rem_cap_ = rem_cap_; -// task->score_hist_ = score_hist_; + task->score_hist_ = score_hist_; } void MonitorStatBdev(u32 mode, StatBdevTask *task, RunContext &ctx) { } diff --git a/tasks/bdev/include/bdev/bdev_tasks.h b/tasks/bdev/include/bdev/bdev_tasks.h index 196edb399..28281c5f9 100644 --- a/tasks/bdev/include/bdev/bdev_tasks.h +++ b/tasks/bdev/include/bdev/bdev_tasks.h @@ -246,7 +246,7 @@ struct ReadTask : public Task, TaskFlags { /** A task to monitor bdev statistics */ struct StatBdevTask : public Task, TaskFlags { OUT size_t rem_cap_; /**< Remaining capacity of the target */ - // OUT Histogram score_hist_; /**< Score distribution */ + OUT Histogram score_hist_; /**< Score distribution */ /** SHM default constructor */ HSHM_ALWAYS_INLINE explicit @@ -272,6 +272,7 @@ struct StatBdevTask : public Task, TaskFlags { // Custom rem_cap_ = rem_cap; + score_hist_.Resize(10); } /** Create group */ diff --git a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc index 4e34a490b..f6676e8af 100644 --- a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc +++ b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc @@ -142,21 +142,37 @@ class Server : public TaskLib { } /** New score */ - float MakeScore(BlobInfo &blob_info, hshm::Timepoint &now) { - float freq_score = blob_info.access_freq_ / 5; - float access_score = (float)(1 - (blob_info.last_access_.GetSecFromStart(now) / 5)); - if (freq_score > 1) { - freq_score = 1; + float NormalizeScore(float score) { + if (score > 1) { + return 1; } - if (access_score > 1) { - access_score = 1; + if (score < 0) { + return 0; } - float data_score = std::max(freq_score, access_score); + return score; + } + float MakeScore(BlobInfo &blob_info, hshm::Timepoint &now) { + ServerConfig &server = HERMES_CONF->server_config_; + // Frequency score: how many times blob accessed? + float freq_min = server.borg_.freq_min_; + float freq_diff = server.borg_.freq_max_ - freq_min; + float freq_score = NormalizeScore((blob_info.access_freq_ - freq_min) / freq_diff); + // Temporal score: how recently the blob was accessed? + float time_diff = blob_info.last_access_.GetSecFromStart(now); + float rec_min = server.borg_.recency_min_; + float rec_max = server.borg_.recency_max_; + float rec_diff = rec_max - rec_min; + float temporal_score = NormalizeScore((time_diff - rec_min) / rec_diff); + temporal_score = 1 - temporal_score; + // Access score: was the blob accessed recently or frequently? + float access_score = std::max(freq_score, temporal_score); float user_score = blob_info.user_score_; + // Final scores if (!blob_info.flags_.Any(HERMES_USER_SCORE_STATIONARY)) { - user_score *= data_score; + return user_score * access_score; + } else { + return std::max(access_score, user_score); } - return std::max(data_score, user_score); } /** Check if blob should be reorganized */ @@ -164,22 +180,39 @@ class Server : public TaskLib { bool ShouldReorganize(BlobInfo &blob_info, float score, TaskNode &task_node) { -// for (BufferInfo &buf : blob_info.buffers_) { -// TargetInfo &target = *target_map_[buf.tid_]; -// Histogram &hist = target.monitor_task_->score_hist_; -// if constexpr(UPDATE_SCORE) { -// target.AsyncUpdateScore(task_node + 1, -// blob_info.score_, score); -// } -// u32 percentile = hist.GetPercentile(score); -// size_t rem_cap = target.monitor_task_->rem_cap_; -// size_t max_cap = target.max_cap_; -// if (rem_cap < max_cap / 10) { -// if (percentile < 10 || percentile > 90) { -// return true; -// } -// } -// } + ServerConfig &server = HERMES_CONF->server_config_; + for (BufferInfo &buf : blob_info.buffers_) { + TargetInfo &target = *target_map_[buf.tid_]; + Histogram &hist = target.monitor_task_->score_hist_; + u32 percentile = hist.GetPercentile(score); + size_t rem_cap = target.monitor_task_->rem_cap_; + size_t max_cap = target.max_cap_; + float min_score = hist.GetQuantile(0); + // Update the target score + if (rem_cap < max_cap * .5) { + // Enough capacity has been used to make scoring important. + target.score_ = min_score; + } else { + // There's a lot of capacity left. + // Make DPE start placing data here. + target.score_ = 0; + } + // Update blob score + if constexpr(UPDATE_SCORE) { + u32 bin_orig = hist.GetBin(blob_info.score_); + u32 bin_new = hist.GetBin(score); + if (bin_orig != bin_new) { + target.AsyncUpdateScore(task_node + 1, + blob_info.score_, score); + } + } + // Determine if the blob should be reorganized + if (rem_cap <= max_cap * target.borg_min_thresh_) { + if (percentile < 10) { + return true; + } + } + } return false; } @@ -196,15 +229,15 @@ class Server : public TaskLib { BlobInfo &blob_info = it.second; // Update blob scores // TODO(llogan): Add back -// float new_score = MakeScore(blob_info, now); -// if (ShouldReorganize(blob_info, new_score, task->task_node_)) { -// blob_mdm_.AsyncReorganizeBlob(task->task_node_ + 1, -// blob_info.tag_id_, -// blob_info.blob_id_, -// new_score, 0, false); -// } -// blob_info.access_freq_ = 0; -// blob_info.score_ = new_score; + float new_score = MakeScore(blob_info, now); + if (ShouldReorganize(blob_info, new_score, task->task_node_)) { + blob_mdm_.AsyncReorganizeBlob(task->task_node_ + 1, + blob_info.tag_id_, + blob_info.blob_id_, + new_score, 0, false); + } + blob_info.access_freq_ = 0; + blob_info.score_ = new_score; // Flush data size_t mod_count = blob_info.mod_count_; @@ -256,6 +289,8 @@ class Server : public TaskLib { HILOG(kDebug, "Beginning PUT for {}", blob_name.str()); BLOB_MAP_T &blob_map = blob_map_[rctx.lane_id_]; BlobInfo &blob_info = blob_map[task->blob_id_]; + blob_info.score_ = task->score_; + blob_info.user_score_ = task->score_; // Stage Blob if (task->flags_.Any(HERMES_IS_FILE) && blob_info.last_flush_ == 0) { diff --git a/tasks/posix_bdev/src/posix_bdev.cc b/tasks/posix_bdev/src/posix_bdev.cc index 4474ef93c..393762563 100644 --- a/tasks/posix_bdev/src/posix_bdev.cc +++ b/tasks/posix_bdev/src/posix_bdev.cc @@ -26,7 +26,7 @@ class Server : public TaskLib, public bdev::Server { DeviceInfo &dev_info = task->info_; rem_cap_ = dev_info.capacity_; alloc_.Init(id_, dev_info.capacity_, dev_info.slab_sizes_); - // score_hist_.Resize(10); + score_hist_.Resize(10); std::string text = dev_info.mount_dir_ + "/" + "slab_" + dev_info.dev_name_; auto canon = stdfs::weakly_canonical(text).string(); @@ -56,7 +56,7 @@ class Server : public TaskLib, public bdev::Server { alloc_.Allocate(task->size_, *task->buffers_, task->alloc_size_); HILOG(kDebug, "Allocated {}/{} bytes ({})", task->alloc_size_, task->size_, path_); rem_cap_ -= task->alloc_size_; - // score_hist_.Increment(task->score_); + score_hist_.Increment(task->score_); task->SetModuleComplete(); } void MonitorAllocate(u32 mode, AllocateTask *task, RunContext &rctx) { @@ -65,7 +65,7 @@ class Server : public TaskLib, public bdev::Server { /** Free space from bdev */ void Free(FreeTask *task, RunContext &rctx) { rem_cap_ += alloc_.Free(task->buffers_); - // score_hist_.Decrement(task->score_); + score_hist_.Decrement(task->score_); task->SetModuleComplete(); } void MonitorFree(u32 mode, FreeTask *task, RunContext &rctx) { diff --git a/tasks/ram_bdev/src/ram_bdev.cc b/tasks/ram_bdev/src/ram_bdev.cc index ae15db7b2..392decba3 100644 --- a/tasks/ram_bdev/src/ram_bdev.cc +++ b/tasks/ram_bdev/src/ram_bdev.cc @@ -21,7 +21,7 @@ class Server : public TaskLib, public bdev::Server { rem_cap_ = dev_info.capacity_; alloc_.Init(id_, dev_info.capacity_, dev_info.slab_sizes_); mem_ptr_ = (char*)malloc(dev_info.capacity_); - // score_hist_.Resize(10); + score_hist_.Resize(10); HILOG(kDebug, "Created {} at {} of size {}", dev_info.dev_name_, dev_info.mount_point_, dev_info.capacity_); task->SetModuleComplete(); @@ -42,7 +42,7 @@ class Server : public TaskLib, public bdev::Server { HILOG(kDebug, "Allocating {} bytes (RAM)", task->size_); alloc_.Allocate(task->size_, *task->buffers_, task->alloc_size_); rem_cap_ -= task->alloc_size_; - // score_hist_.Increment(task->score_); + score_hist_.Increment(task->score_); HILOG(kDebug, "Allocated {} bytes (RAM)", task->alloc_size_); task->SetModuleComplete(); } @@ -52,7 +52,7 @@ class Server : public TaskLib, public bdev::Server { /** Free space to bdev */ void Free(FreeTask *task, RunContext &rctx) { rem_cap_ += alloc_.Free(task->buffers_); - // score_hist_.Decrement(task->score_); + score_hist_.Decrement(task->score_); task->SetModuleComplete(); } void MonitorFree(u32 mode, FreeTask *task, RunContext &rctx) { diff --git a/test/unit/hermes/test_bucket.cc b/test/unit/hermes/test_bucket.cc index 3f3edb2ce..2cd437d24 100644 --- a/test/unit/hermes/test_bucket.cc +++ b/test/unit/hermes/test_bucket.cc @@ -108,14 +108,14 @@ TEST_CASE("TestHermesPutGet") { hermes::Bucket bkt("hello"); HILOG(kInfo, "BUCKET LOADED!!!") - size_t count_per_proc = 4; + size_t count_per_proc = 16; size_t off = rank * count_per_proc; size_t proc_count = off + count_per_proc; for (int rep = 0; rep < 4; ++rep) { for (size_t i = off; i < proc_count; ++i) { HILOG(kInfo, "Iteration: {} with blob name {}", i, std::to_string(i)); // Put a blob - hermes::Blob blob(KILOBYTES(4)); + hermes::Blob blob(MEGABYTES(1)); memset(blob.data(), i % 256, blob.size()); hermes::BlobId blob_id = bkt.Put(std::to_string(i), blob, ctx); HILOG(kInfo, "(iteration {}) Using BlobID: {}", i, blob_id); @@ -125,6 +125,7 @@ TEST_CASE("TestHermesPutGet") { REQUIRE(blob.size() == blob2.size()); REQUIRE(blob == blob2); } + sleep(5); } } diff --git a/test/unit/pipelines/test_borg.yaml b/test/unit/pipelines/test_borg.yaml new file mode 100644 index 000000000..6e3c827be --- /dev/null +++ b/test/unit/pipelines/test_borg.yaml @@ -0,0 +1,10 @@ +name: hermes_unit_ior +env: hermes +pkgs: + - pkg_type: hermes_run + pkg_name: hermes_run + ram: 10m + sleep: 5 + - pkg_type: hermes_unit_tests + pkg_name: hermes_unit_tests + TEST_CASE: TestHermesPutGet From 8376dd53e6d186ad1d36d3c4dfc4503d7643e783 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Tue, 7 Nov 2023 13:02:31 -0600 Subject: [PATCH 007/114] configs work --- include/hermes/config_manager.h | 4 ++-- test/unit/pipelines/test_borg.yaml | 5 +++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/include/hermes/config_manager.h b/include/hermes/config_manager.h index 1cf44a040..9b982b0d8 100644 --- a/include/hermes/config_manager.h +++ b/include/hermes/config_manager.h @@ -62,7 +62,7 @@ class ConfigurationManager { is_initialized_ = true; } - void LoadClientConfig(std::string &config_path) { + void LoadClientConfig(std::string config_path) { // Load hermes config if (config_path.empty()) { config_path = GetEnvSafe(Constant::kHermesClientConf); @@ -71,7 +71,7 @@ class ConfigurationManager { client_config_.LoadFromFile(config_path); } - void LoadServerConfig(std::string &config_path) { + void LoadServerConfig(std::string config_path) { // Load hermes config if (config_path.empty()) { config_path = GetEnvSafe(Constant::kHermesServerConf); diff --git a/test/unit/pipelines/test_borg.yaml b/test/unit/pipelines/test_borg.yaml index 6e3c827be..7b60e026f 100644 --- a/test/unit/pipelines/test_borg.yaml +++ b/test/unit/pipelines/test_borg.yaml @@ -3,8 +3,13 @@ env: hermes pkgs: - pkg_type: hermes_run pkg_name: hermes_run + recency_max: 1 ram: 10m sleep: 5 + do_dbg: true + dbg_port: 4000 - pkg_type: hermes_unit_tests pkg_name: hermes_unit_tests TEST_CASE: TestHermesPutGet + do_dbg: true + dbg_port: 4001 From 7a6ac6ef89f24423b48d85867582a711c679219b Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Tue, 7 Nov 2023 13:35:07 -0600 Subject: [PATCH 008/114] Use percentile_lt --- include/hermes/score_histogram.h | 19 ++++++++++++++++--- tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc | 4 ++++ 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/include/hermes/score_histogram.h b/include/hermes/score_histogram.h index 7341a5ed7..5f503495d 100644 --- a/include/hermes/score_histogram.h +++ b/include/hermes/score_histogram.h @@ -117,7 +117,8 @@ class Histogram { * @input score a number between 0 and 1 * @return Percentile (a number between 0 and 100) * */ - u32 GetPercentile(float score) { + template + u32 GetPercentileBase(float score) { if (score == 0) { return 0; } @@ -126,11 +127,23 @@ class Histogram { } u32 bin = GetBin(score); u32 count = 0; - for (u32 i = 0; i <= bin; ++i) { - count += histogram_[i].x_.load(); + if (LESS_THAN_BIN) { + for (u32 i = 0; i <= bin; ++i) { + count += histogram_[i].x_.load(); + } + } else { + for (u32 i = 0; i < bin; ++i) { + count += histogram_[i].x_.load(); + } } return count * 100 / count_; } + u32 GetPercentile(float score) { + return GetPercentileBase(score); + } + u32 GetPercentileLT(float score) { + return GetPercentileBase(score); + } /** * Get quantile. diff --git a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc index f6676e8af..1cf50a36c 100644 --- a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc +++ b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc @@ -185,6 +185,7 @@ class Server : public TaskLib { TargetInfo &target = *target_map_[buf.tid_]; Histogram &hist = target.monitor_task_->score_hist_; u32 percentile = hist.GetPercentile(score); + u32 precentile_lt = hist.GetPercentileLT(score); size_t rem_cap = target.monitor_task_->rem_cap_; size_t max_cap = target.max_cap_; float min_score = hist.GetQuantile(0); @@ -211,6 +212,9 @@ class Server : public TaskLib { if (percentile < 10) { return true; } + if (precentile_lt > 90) { + return false; + } } } return false; From 8431ea5c4b7f5bb4ca27afb96c27ca8a213dfbc7 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Tue, 7 Nov 2023 13:35:23 -0600 Subject: [PATCH 009/114] Use percentile_lt --- tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc index 1cf50a36c..39a3f8a4d 100644 --- a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc +++ b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc @@ -209,12 +209,9 @@ class Server : public TaskLib { } // Determine if the blob should be reorganized if (rem_cap <= max_cap * target.borg_min_thresh_) { - if (percentile < 10) { + if (percentile < 10 || precentile_lt > 90) { return true; } - if (precentile_lt > 90) { - return false; - } } } return false; From 5fdaadb582029d71a3e7b71586a0b204cb663c10 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Tue, 7 Nov 2023 13:44:38 -0600 Subject: [PATCH 010/114] Use percentile_lt before rem_cap check --- tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc index 39a3f8a4d..e093e9a1f 100644 --- a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc +++ b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc @@ -208,8 +208,11 @@ class Server : public TaskLib { } } // Determine if the blob should be reorganized + if (precentile_lt > 90) { + return true; + } if (rem_cap <= max_cap * target.borg_min_thresh_) { - if (percentile < 10 || precentile_lt > 90) { + if (percentile < 10) { return true; } } From 4f8c2c3f047523d31c39ebd21e078c6e8647eb6a Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Tue, 7 Nov 2023 13:50:06 -0600 Subject: [PATCH 011/114] Actually free buffers --- tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc index e093e9a1f..49eaa4a74 100644 --- a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc +++ b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc @@ -449,9 +449,9 @@ class Server : public TaskLib { for (BufferInfo &buf : blob_info.buffers_) { TargetInfo &target = *target_map_[buf.tid_]; std::vector buf_vec = {buf}; -// target.AsyncFree(task->task_node_ + 1, -// blob_info.score_, -// std::move(buf_vec), true); + target.AsyncFree(task->task_node_ + 1, + blob_info.score_, + std::move(buf_vec), true); } blob_info.buffers_.clear(); blob_info.max_blob_size_ = 0; From 04af5d0187c73f2403937a8b11e952f457a250ac Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Tue, 7 Nov 2023 14:04:56 -0600 Subject: [PATCH 012/114] Cosmetic changes --- include/hermes/score_histogram.h | 4 ++-- tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/include/hermes/score_histogram.h b/include/hermes/score_histogram.h index 5f503495d..d7db6e0b8 100644 --- a/include/hermes/score_histogram.h +++ b/include/hermes/score_histogram.h @@ -117,7 +117,7 @@ class Histogram { * @input score a number between 0 and 1 * @return Percentile (a number between 0 and 100) * */ - template + template u32 GetPercentileBase(float score) { if (score == 0) { return 0; @@ -127,7 +127,7 @@ class Histogram { } u32 bin = GetBin(score); u32 count = 0; - if (LESS_THAN_BIN) { + if (LESS_THAN_EQUAL) { for (u32 i = 0; i <= bin; ++i) { count += histogram_[i].x_.load(); } diff --git a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc index 49eaa4a74..1fafb9713 100644 --- a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc +++ b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc @@ -232,7 +232,6 @@ class Server : public TaskLib { for (auto &it : blob_map) { BlobInfo &blob_info = it.second; // Update blob scores - // TODO(llogan): Add back float new_score = MakeScore(blob_info, now); if (ShouldReorganize(blob_info, new_score, task->task_node_)) { blob_mdm_.AsyncReorganizeBlob(task->task_node_ + 1, From ab9f3784db900b68e9c3a4cc11a3f9d427935de3 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Tue, 7 Nov 2023 14:20:32 -0600 Subject: [PATCH 013/114] Print statement changes --- include/hermes/dpe/minimize_io_time.h | 1 - tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc | 9 ++++----- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/include/hermes/dpe/minimize_io_time.h b/include/hermes/dpe/minimize_io_time.h index e4a7c8b1d..7c09b6c96 100644 --- a/include/hermes/dpe/minimize_io_time.h +++ b/include/hermes/dpe/minimize_io_time.h @@ -44,7 +44,6 @@ class MinimizeIoTime : public Dpe { size_t rem_cap = target.GetRemCap(); if (target.score_ > score || rem_cap < blob_size) { // TODO(llogan): add other considerations of this Dpe - HILOG(kInfo, "Not enough space or score in {} of bw {}", target.id_, target.bandwidth_) continue; } if (ctx.blob_score_ == -1) { diff --git a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc index 1fafb9713..bf2afcd42 100644 --- a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc +++ b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc @@ -346,15 +346,14 @@ class Server : public TaskLib { placement.size_, blob_info.buffers_); alloc_task->Wait(task); - HILOG(kInfo, "(node {}) Placing {}/{} bytes in target {} of bw {}", - HRUN_CLIENT->node_id_, - alloc_task->alloc_size_, task->data_size_, - placement.tid_, bdev.bandwidth_) +// HILOG(kInfo, "(node {}) Placing {}/{} bytes in target {} of bw {}", +// HRUN_CLIENT->node_id_, +// alloc_task->alloc_size_, task->data_size_, +// placement.tid_, bdev.bandwidth_) if (alloc_task->alloc_size_ < alloc_task->size_) { SubPlacement &next_placement = schema.plcmnts_[sub_idx + 1]; size_t diff = alloc_task->size_ - alloc_task->alloc_size_; next_placement.size_ += diff; - HILOG(kInfo, "Delegating more space to the next task ({} bytes)", diff); } // bdev.monitor_task_->rem_cap_ -= alloc_task->alloc_size_; HRUN_CLIENT->DelTask(alloc_task); From 06db975b69f328592cdfe530397e4004e2b69d25 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Tue, 7 Nov 2023 14:37:56 -0600 Subject: [PATCH 014/114] Add experimental data pointer change --- hrun/include/hrun/api/hrun_client.h | 25 +++++++++++++------ .../remote_queue/src/remote_queue.cc | 2 +- .../data_stager/factory/binary_stager.h | 2 +- tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc | 8 +++--- tasks/hermes_data_op/src/hermes_data_op.cc | 4 +-- 5 files changed, 26 insertions(+), 15 deletions(-) diff --git a/hrun/include/hrun/api/hrun_client.h b/hrun/include/hrun/api/hrun_client.h index 82c91372d..75ab87e42 100644 --- a/hrun/include/hrun/api/hrun_client.h +++ b/hrun/include/hrun/api/hrun_client.h @@ -240,14 +240,18 @@ class Client : public ConfigurationManager { } /** Allocate a buffer in a task */ - template + template HSHM_ALWAYS_INLINE LPointer AllocateBuffer(size_t size, Task *yield_task) { LPointer p; // HILOG(kInfo, "Heap size: {}", data_alloc_->GetCurrentlyAllocatedSize()); while (true) { try { - p = data_alloc_->AllocateLocalPtr(size); + if constexpr(IN_CLIENT) { + p = data_alloc_->AllocateLocalPtr(size); + } else { + p = main_alloc_->AllocateLocalPtr(size); + } } catch (...) { p.shm_.SetNull(); } @@ -261,14 +265,18 @@ class Client : public ConfigurationManager { } /** Allocate a buffer */ - template + template HSHM_ALWAYS_INLINE LPointer AllocateBuffer(size_t size) { // HILOG(kInfo, "{} Heap size: {}", THREAD_MODEL, data_alloc_->GetCurrentlyAllocatedSize()); LPointer p; while (true) { try { - p = data_alloc_->AllocateLocalPtr(size); + if constexpr(IN_CLIENT) { + p = data_alloc_->AllocateLocalPtr(size); + } else { + p = main_alloc_->AllocateLocalPtr(size); + } } catch (...) { p.shm_.SetNull(); } @@ -285,21 +293,24 @@ class Client : public ConfigurationManager { HSHM_ALWAYS_INLINE void FreeBuffer(hipc::Pointer &p) { // HILOG(kInfo, "Heap size: {}", data_alloc_->GetCurrentlyAllocatedSize()); - data_alloc_->Free(p); + auto alloc = HERMES_MEMORY_MANAGER->GetAllocator(p.allocator_id_); + alloc->Free(p); } /** Free a buffer */ HSHM_ALWAYS_INLINE void FreeBuffer(LPointer &p) { // HILOG(kInfo, "Heap size: {}", data_alloc_->GetCurrentlyAllocatedSize()); - data_alloc_->FreeLocalPtr(p); + auto alloc = HERMES_MEMORY_MANAGER->GetAllocator(p.shm_.allocator_id_); + alloc->FreeLocalPtr(p); } /** Convert pointer to char* */ template HSHM_ALWAYS_INLINE T* GetDataPointer(const hipc::Pointer &p) { - return data_alloc_->Convert(p); + auto alloc = HERMES_MEMORY_MANAGER->GetAllocator(p.allocator_id_); + return alloc->Convert(p); } /** Get a queue by its ID */ diff --git a/hrun/tasks_required/remote_queue/src/remote_queue.cc b/hrun/tasks_required/remote_queue/src/remote_queue.cc index 8c30089b2..3bfc5ac24 100644 --- a/hrun/tasks_required/remote_queue/src/remote_queue.cc +++ b/hrun/tasks_required/remote_queue/src/remote_queue.cc @@ -379,7 +379,7 @@ class Server : public TaskLib { size_t data_size, IoType io_type) { LPointer data = - HRUN_CLIENT->AllocateBuffer(data_size); + HRUN_CLIENT->AllocateBuffer(data_size); // Create the input data transfer object std::vector xfer(2); diff --git a/tasks/data_stager/include/data_stager/factory/binary_stager.h b/tasks/data_stager/include/data_stager/factory/binary_stager.h index 8ddae035f..5e6f2c7ba 100644 --- a/tasks/data_stager/include/data_stager/factory/binary_stager.h +++ b/tasks/data_stager/include/data_stager/factory/binary_stager.h @@ -54,7 +54,7 @@ class BinaryFileStager : public AbstractStager { plcmnt.DecodeBlobName(*task->blob_name_, page_size_); HILOG(kDebug, "Attempting to stage {} bytes from the backend file {} at offset {}", page_size_, url_, plcmnt.bucket_off_); - LPointer blob = HRUN_CLIENT->AllocateBuffer(page_size_); + LPointer blob = HRUN_CLIENT->AllocateBuffer(page_size_); fd_ = HERMES_POSIX_API->open(path_.c_str(), O_CREAT | O_RDWR, 0666); if (fd_ < 0) { HELOG(kError, "Failed to open file {}", path_); diff --git a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc index bf2afcd42..3de2d617c 100644 --- a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc +++ b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc @@ -248,8 +248,8 @@ class Server : public TaskLib { mod_count > blob_info.last_flush_) { HILOG(kDebug, "Flushing blob {} (mod_count={}, last_flush={})", blob_info.blob_id_, blob_info.mod_count_, blob_info.last_flush_); - LPointer data = HRUN_CLIENT->AllocateBuffer(blob_info.blob_size_, - task); + LPointer data = HRUN_CLIENT->AllocateBuffer( + blob_info.blob_size_, task); LPointer get_blob = blob_mdm_.AsyncGetBlob(task->task_node_ + 1, blob_info.tag_id_, @@ -815,8 +815,8 @@ class Server : public TaskLib { task->SetModuleComplete(); return; } - task->data_ = HRUN_CLIENT->AllocateBuffer(blob_info.blob_size_, - task).shm_; + task->data_ = HRUN_CLIENT->AllocateBuffer( + blob_info.blob_size_, task).shm_; task->data_size_ = blob_info.blob_size_; task->get_task_ = blob_mdm_.AsyncGetBlob(task->task_node_ + 1, task->tag_id_, diff --git a/tasks/hermes_data_op/src/hermes_data_op.cc b/tasks/hermes_data_op/src/hermes_data_op.cc index 08f0b73b3..f46e02fb9 100644 --- a/tasks/hermes_data_op/src/hermes_data_op.cc +++ b/tasks/hermes_data_op/src/hermes_data_op.cc @@ -183,7 +183,7 @@ class Server : public TaskLib { for (OpData &data : op_data) { // Get the input data LPointer data_ptr = - HRUN_CLIENT->AllocateBuffer(data.size_, task); + HRUN_CLIENT->AllocateBuffer(data.size_, task); LPointer in_task = blob_mdm_.AsyncGetBlob(task->task_node_ + 1, data.bkt_id_, @@ -203,7 +203,7 @@ class Server : public TaskLib { // Calaculate the minimum LPointer min_lptr = - HRUN_CLIENT->AllocateBuffer(sizeof(float), task); + HRUN_CLIENT->AllocateBuffer(sizeof(float), task); float *min_ptr = (float*)min_lptr.ptr_; *min_ptr = std::numeric_limits::max(); for (size_t i = 0; i < in_task->data_size_; i += sizeof(float)) { From fbc574eb51609d18464a7f15093a49d355a0ed84 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Tue, 7 Nov 2023 14:47:45 -0600 Subject: [PATCH 015/114] Don't do main alloc for data for now --- hrun/include/hrun/api/hrun_client.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hrun/include/hrun/api/hrun_client.h b/hrun/include/hrun/api/hrun_client.h index 75ab87e42..d3047fa3a 100644 --- a/hrun/include/hrun/api/hrun_client.h +++ b/hrun/include/hrun/api/hrun_client.h @@ -250,7 +250,7 @@ class Client : public ConfigurationManager { if constexpr(IN_CLIENT) { p = data_alloc_->AllocateLocalPtr(size); } else { - p = main_alloc_->AllocateLocalPtr(size); + p = data_alloc_->AllocateLocalPtr(size); } } catch (...) { p.shm_.SetNull(); @@ -275,7 +275,7 @@ class Client : public ConfigurationManager { if constexpr(IN_CLIENT) { p = data_alloc_->AllocateLocalPtr(size); } else { - p = main_alloc_->AllocateLocalPtr(size); + p = data_alloc_->AllocateLocalPtr(size); } } catch (...) { p.shm_.SetNull(); From a0438fb4e7dc77f797167706ec765a40adf1be1b Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Tue, 7 Nov 2023 14:50:02 -0600 Subject: [PATCH 016/114] Stop gcc from complaining --- test/unit/hermes_adapters/filesystem_tests.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/test/unit/hermes_adapters/filesystem_tests.h b/test/unit/hermes_adapters/filesystem_tests.h index daffba071..95661f7e6 100644 --- a/test/unit/hermes_adapters/filesystem_tests.h +++ b/test/unit/hermes_adapters/filesystem_tests.h @@ -241,7 +241,10 @@ class FilesystemTests { if (fd == -1) { HELOG(kFatal, "Failed to open file: {}", path); } - write(fd, data.data(), data.size()); + int ret = write(fd, data.data(), data.size()); + if (ret != data.size()) { + return; + } close(fd); REQUIRE(stdfs::file_size(path) == data.size()); } From de790df8ad6badaa6d28e874c31a4e05135e6d76 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Tue, 7 Nov 2023 14:52:46 -0600 Subject: [PATCH 017/114] Add prints to blob --- tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc index 3de2d617c..c667cea9b 100644 --- a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc +++ b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc @@ -209,10 +209,12 @@ class Server : public TaskLib { } // Determine if the blob should be reorganized if (precentile_lt > 90) { + HILOG(kInfo, "Should reorganize based on max"); return true; } if (rem_cap <= max_cap * target.borg_min_thresh_) { if (percentile < 10) { + HILOG(kInfo, "Should reorganize based on rem"); return true; } } From 2131997faebbfa6481e72b354fad374b7d5936cb Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Tue, 7 Nov 2023 14:55:28 -0600 Subject: [PATCH 018/114] Put percentile back --- tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc index c667cea9b..2d5d768ac 100644 --- a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc +++ b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc @@ -208,11 +208,11 @@ class Server : public TaskLib { } } // Determine if the blob should be reorganized - if (precentile_lt > 90) { - HILOG(kInfo, "Should reorganize based on max"); - return true; - } if (rem_cap <= max_cap * target.borg_min_thresh_) { + if (precentile_lt > 90) { + HILOG(kInfo, "Should reorganize based on max"); + return true; + } if (percentile < 10) { HILOG(kInfo, "Should reorganize based on rem"); return true; From b695c60eedbb617ab2839af3d5d70debacb257ba Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Tue, 7 Nov 2023 16:43:15 -0600 Subject: [PATCH 019/114] Make borg reorganize based on fixed tier score --- include/hermes/dpe/minimize_io_time.h | 4 +- tasks/bdev/include/bdev/bdev.h | 1 + tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc | 45 +++++++++++++------- 3 files changed, 34 insertions(+), 16 deletions(-) diff --git a/include/hermes/dpe/minimize_io_time.h b/include/hermes/dpe/minimize_io_time.h index 7c09b6c96..ecdc6da3c 100644 --- a/include/hermes/dpe/minimize_io_time.h +++ b/include/hermes/dpe/minimize_io_time.h @@ -42,7 +42,9 @@ class MinimizeIoTime : public Dpe { // NOTE(llogan): We skip targets that are too high of priority or // targets that can't fit the ENTIRE blob size_t rem_cap = target.GetRemCap(); - if (target.score_ > score || rem_cap < blob_size) { + // TODO(llogan): add back + // if (target.score_ > score || rem_cap < blob_size) { + if (rem_cap < blob_size) { // TODO(llogan): add other considerations of this Dpe continue; } diff --git a/tasks/bdev/include/bdev/bdev.h b/tasks/bdev/include/bdev/bdev.h index a6d23ee67..44bf27258 100644 --- a/tasks/bdev/include/bdev/bdev.h +++ b/tasks/bdev/include/bdev/bdev.h @@ -21,6 +21,7 @@ class Client : public TaskLibClient { double bandwidth_; /**< the bandwidth of the device */ double latency_; /**< the latency of the device */ float score_; /**< Relative importance of this tier */ + float bw_score_; /**< Relative importance of this tier */ f32 borg_min_thresh_; /**< Capacity percentage too low */ f32 borg_max_thresh_; /**< Capacity percentage too high */ diff --git a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc index 2d5d768ac..c8f0ba788 100644 --- a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc +++ b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc @@ -94,6 +94,11 @@ class Server : public TaskLib { [](const bdev::Client &a, const bdev::Client &b) { return a.bandwidth_ > b.bandwidth_; }); + float bw_max = targets_.front().bandwidth_; + float bw_min = targets_.front().bandwidth_; + for (bdev::Client &client : targets_) { + client.bw_score_ = (client.bandwidth_ - bw_min) / (bw_max - bw_min); + } for (bdev::Client &client : targets_) { target_map_.emplace(client.id_, &client); HILOG(kInfo, "(node {}) Target {} has bw {}", HRUN_CLIENT->node_id_, @@ -188,16 +193,17 @@ class Server : public TaskLib { u32 precentile_lt = hist.GetPercentileLT(score); size_t rem_cap = target.monitor_task_->rem_cap_; size_t max_cap = target.max_cap_; - float min_score = hist.GetQuantile(0); + // float min_score = hist.GetQuantile(0); // Update the target score - if (rem_cap < max_cap * .5) { - // Enough capacity has been used to make scoring important. - target.score_ = min_score; - } else { - // There's a lot of capacity left. - // Make DPE start placing data here. - target.score_ = 0; - } + target.score_ = target.bw_score_; +// if (rem_cap < max_cap * .5) { +// // Enough capacity has been used to make scoring important. +// target.score_ = target.bw_score_; +// } else { +// // There's a lot of capacity left. +// // Make DPE start placing data here. +// target.score_ = 0; +// } // Update blob score if constexpr(UPDATE_SCORE) { u32 bin_orig = hist.GetBin(blob_info.score_); @@ -208,16 +214,25 @@ class Server : public TaskLib { } } // Determine if the blob should be reorganized - if (rem_cap <= max_cap * target.borg_min_thresh_) { - if (precentile_lt > 90) { - HILOG(kInfo, "Should reorganize based on max"); - return true; + // Get the target with minimum difference in score to this blob + for (const bdev::Client &cmp_tgt: targets_) { + if (cmp_tgt.score_ > score + .05) { + continue; } - if (percentile < 10) { - HILOG(kInfo, "Should reorganize based on rem"); + if (cmp_tgt.id_ != target.id_) { return true; } } +// if (rem_cap <= max_cap * target.borg_min_thresh_) { +// if (precentile_lt > 90) { +// HILOG(kInfo, "Should reorganize based on max"); +// return true; +// } +// if (percentile < 10) { +// HILOG(kInfo, "Should reorganize based on rem"); +// return true; +// } +// } } return false; } From 2f884ea1910c4bcb46730138d8f3bba56b3d5b11 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Tue, 7 Nov 2023 17:05:02 -0600 Subject: [PATCH 020/114] Don't reorganize unless score diff is high --- tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc index c8f0ba788..83bb83076 100644 --- a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc +++ b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc @@ -215,6 +215,9 @@ class Server : public TaskLib { } // Determine if the blob should be reorganized // Get the target with minimum difference in score to this blob + if (abs(target.score_ - score) < .1) { + return false; + } for (const bdev::Client &cmp_tgt: targets_) { if (cmp_tgt.score_ > score + .05) { continue; From 53358b92531b9ba733fd45e21a5b199fdbddbf88 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Tue, 7 Nov 2023 17:21:34 -0600 Subject: [PATCH 021/114] Even more basic borg --- tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc | 23 ++++++++++---------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc index 83bb83076..1a6d8adcc 100644 --- a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc +++ b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc @@ -174,7 +174,8 @@ class Server : public TaskLib { float user_score = blob_info.user_score_; // Final scores if (!blob_info.flags_.Any(HERMES_USER_SCORE_STATIONARY)) { - return user_score * access_score; + // return user_score * access_score; + return user_score; } else { return std::max(access_score, user_score); } @@ -215,17 +216,17 @@ class Server : public TaskLib { } // Determine if the blob should be reorganized // Get the target with minimum difference in score to this blob - if (abs(target.score_ - score) < .1) { - return false; - } - for (const bdev::Client &cmp_tgt: targets_) { - if (cmp_tgt.score_ > score + .05) { - continue; - } - if (cmp_tgt.id_ != target.id_) { - return true; - } + if (abs(target.score_ - score) > .1) { + return true; } +// for (const bdev::Client &cmp_tgt: targets_) { +// if (cmp_tgt.score_ > score + .05) { +// continue; +// } +// if (cmp_tgt.id_ != target.id_) { +// return true; +// } +// } // if (rem_cap <= max_cap * target.borg_min_thresh_) { // if (precentile_lt > 90) { // HILOG(kInfo, "Should reorganize based on max"); From 74ba1c07f755fc499a902f51878efbc4517aed4b Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Tue, 7 Nov 2023 17:23:35 -0600 Subject: [PATCH 022/114] Remove access --- tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc index 1a6d8adcc..125f7cf3e 100644 --- a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc +++ b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc @@ -216,17 +216,17 @@ class Server : public TaskLib { } // Determine if the blob should be reorganized // Get the target with minimum difference in score to this blob - if (abs(target.score_ - score) > .1) { - return true; + if (abs(target.score_ - score) < .1) { + return false; + } + for (const bdev::Client &cmp_tgt: targets_) { + if (cmp_tgt.score_ > score + .05) { + continue; + } + if (cmp_tgt.id_ != target.id_) { + return true; + } } -// for (const bdev::Client &cmp_tgt: targets_) { -// if (cmp_tgt.score_ > score + .05) { -// continue; -// } -// if (cmp_tgt.id_ != target.id_) { -// return true; -// } -// } // if (rem_cap <= max_cap * target.borg_min_thresh_) { // if (precentile_lt > 90) { // HILOG(kInfo, "Should reorganize based on max"); From fc5a1cdd82f2de0d358b4302fc4e7459098387bf Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Tue, 7 Nov 2023 17:25:11 -0600 Subject: [PATCH 023/114] BORG --- tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc index 125f7cf3e..94b83585f 100644 --- a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc +++ b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc @@ -224,6 +224,8 @@ class Server : public TaskLib { continue; } if (cmp_tgt.id_ != target.id_) { + HILOG(kInfo, "Should move from {} to {} with score {} and tier score {}", + target.id_, cmp_tgt.score_, score, cmp_tgt.score_); return true; } } From 88a9ceb25972717e354011f46bf35825ee7921e1 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Tue, 7 Nov 2023 17:27:20 -0600 Subject: [PATCH 024/114] Make flush second --- .../include/hermes_blob_mdm/hermes_blob_mdm_tasks.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h b/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h index 90fb30e6d..16691a8c3 100644 --- a/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h +++ b/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h @@ -1167,7 +1167,7 @@ struct FlushDataTask : public Task, TaskFlags { TASK_LONG_RUNNING | TASK_COROUTINE | TASK_REMOTE_DEBUG_MARK); - SetPeriodMs(5); // TODO(llogan): don't hardcode this + SetPeriodSec(1); // TODO(llogan): don't hardcode this domain_id_ = DomainId::GetLocal(); } From 34bd56efff827cf8dadae3e6b1b110017817442d Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Tue, 7 Nov 2023 17:37:04 -0600 Subject: [PATCH 025/114] Bandwidth fix --- tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc index 94b83585f..06af828a5 100644 --- a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc +++ b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc @@ -95,7 +95,7 @@ class Server : public TaskLib { return a.bandwidth_ > b.bandwidth_; }); float bw_max = targets_.front().bandwidth_; - float bw_min = targets_.front().bandwidth_; + float bw_min = targets_.back().bandwidth_; for (bdev::Client &client : targets_) { client.bw_score_ = (client.bandwidth_ - bw_min) / (bw_max - bw_min); } From 8e390d21c69e2d94d693e7f42bb7358da89ac5e3 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Tue, 7 Nov 2023 17:40:42 -0600 Subject: [PATCH 026/114] Score fix --- tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc index 06af828a5..c0d806f89 100644 --- a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc +++ b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc @@ -174,8 +174,7 @@ class Server : public TaskLib { float user_score = blob_info.user_score_; // Final scores if (!blob_info.flags_.Any(HERMES_USER_SCORE_STATIONARY)) { - // return user_score * access_score; - return user_score; + return user_score * access_score; } else { return std::max(access_score, user_score); } From e217ae73ed0fadccc86ea9bd4e553ee54d73e89d Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Tue, 7 Nov 2023 17:43:30 -0600 Subject: [PATCH 027/114] Print tgt score --- tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc index c0d806f89..7212c2c2b 100644 --- a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc +++ b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc @@ -101,8 +101,8 @@ class Server : public TaskLib { } for (bdev::Client &client : targets_) { target_map_.emplace(client.id_, &client); - HILOG(kInfo, "(node {}) Target {} has bw {}", HRUN_CLIENT->node_id_, - client.id_, client.bandwidth_); + HILOG(kInfo, "(node {}) Target {} has bw {} and score {}", HRUN_CLIENT->node_id_, + client.id_, client.bandwidth_, client.bw_score_); } fallback_target_ = &targets_.back(); blob_mdm_.Init(id_); @@ -224,7 +224,7 @@ class Server : public TaskLib { } if (cmp_tgt.id_ != target.id_) { HILOG(kInfo, "Should move from {} to {} with score {} and tier score {}", - target.id_, cmp_tgt.score_, score, cmp_tgt.score_); + target.id_, cmp_tgt.id_, score, cmp_tgt.score_); return true; } } From f54fa265ae93102ff235a4ef7be9749e80af2294 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Tue, 7 Nov 2023 17:46:18 -0600 Subject: [PATCH 028/114] Set target score --- tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc index 7212c2c2b..d234405b3 100644 --- a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc +++ b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc @@ -98,6 +98,7 @@ class Server : public TaskLib { float bw_min = targets_.back().bandwidth_; for (bdev::Client &client : targets_) { client.bw_score_ = (client.bandwidth_ - bw_min) / (bw_max - bw_min); + client.score_ = client.bw_score_; } for (bdev::Client &client : targets_) { target_map_.emplace(client.id_, &client); From 689264ecd56fb2ba9367a34c7378e61af02226ea Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Tue, 7 Nov 2023 17:52:55 -0600 Subject: [PATCH 029/114] Aggressive flushing --- .../include/hermes_blob_mdm/hermes_blob_mdm_tasks.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h b/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h index 16691a8c3..90fb30e6d 100644 --- a/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h +++ b/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h @@ -1167,7 +1167,7 @@ struct FlushDataTask : public Task, TaskFlags { TASK_LONG_RUNNING | TASK_COROUTINE | TASK_REMOTE_DEBUG_MARK); - SetPeriodSec(1); // TODO(llogan): don't hardcode this + SetPeriodMs(5); // TODO(llogan): don't hardcode this domain_id_ = DomainId::GetLocal(); } From 281df63b44fd97275d4e36b49fd2db71446bb328 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Tue, 7 Nov 2023 18:11:18 -0600 Subject: [PATCH 030/114] Make dpe not place low-prio data --- include/hermes/dpe/minimize_io_time.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/hermes/dpe/minimize_io_time.h b/include/hermes/dpe/minimize_io_time.h index ecdc6da3c..2cb6259ec 100644 --- a/include/hermes/dpe/minimize_io_time.h +++ b/include/hermes/dpe/minimize_io_time.h @@ -43,8 +43,8 @@ class MinimizeIoTime : public Dpe { // targets that can't fit the ENTIRE blob size_t rem_cap = target.GetRemCap(); // TODO(llogan): add back - // if (target.score_ > score || rem_cap < blob_size) { - if (rem_cap < blob_size) { + if (target.score_ > score || rem_cap < blob_size) { + // if (rem_cap < blob_size) { // TODO(llogan): add other considerations of this Dpe continue; } From 987543eb45b948078bbdf075cf848713a1317612 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Tue, 7 Nov 2023 18:13:58 -0600 Subject: [PATCH 031/114] Is it the same blobs? --- tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc index d234405b3..e5d7a4a7a 100644 --- a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc +++ b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc @@ -224,8 +224,8 @@ class Server : public TaskLib { continue; } if (cmp_tgt.id_ != target.id_) { - HILOG(kInfo, "Should move from {} to {} with score {} and tier score {}", - target.id_, cmp_tgt.id_, score, cmp_tgt.score_); + HILOG(kInfo, "Should move {} from {} to {} with score {} and tier score {}", + blob_info.blob_id_, target.id_, cmp_tgt.id_, score, cmp_tgt.score_); return true; } } From f861df0f4c7cdcf2fce8d9ec62e93c971c4edaac Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Tue, 7 Nov 2023 18:22:03 -0600 Subject: [PATCH 032/114] Fix score before reorganizing and make score and user score the same --- tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc index e5d7a4a7a..50fd54343 100644 --- a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc +++ b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc @@ -256,6 +256,7 @@ class Server : public TaskLib { BlobInfo &blob_info = it.second; // Update blob scores float new_score = MakeScore(blob_info, now); + blob_info.score_ = new_score; if (ShouldReorganize(blob_info, new_score, task->task_node_)) { blob_mdm_.AsyncReorganizeBlob(task->task_node_ + 1, blob_info.tag_id_, @@ -263,7 +264,6 @@ class Server : public TaskLib { new_score, 0, false); } blob_info.access_freq_ = 0; - blob_info.score_ = new_score; // Flush data size_t mod_count = blob_info.mod_count_; @@ -829,8 +829,7 @@ class Server : public TaskLib { BlobInfo &blob_info = it->second; if (task->is_user_score_) { blob_info.user_score_ = task->score_; - blob_info.score_ = std::max(blob_info.user_score_, - blob_info.score_); + blob_info.score_ = blob_info.user_score_; } else { blob_info.score_ = task->score_; } From 425adcef7ea42e4809cd5b61085ef51614ce8fc2 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Tue, 7 Nov 2023 23:27:36 -0600 Subject: [PATCH 033/114] Don't reorganize too many blobs asynchronously --- .../include/hermes_blob_mdm/hermes_blob_mdm.h | 5 +++-- .../include/hermes_blob_mdm/hermes_blob_mdm_tasks.h | 5 +++-- tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc | 12 ++++++++---- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm.h b/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm.h index 3443a511e..ba38e0d83 100644 --- a/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm.h +++ b/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm.h @@ -187,11 +187,12 @@ class Client : public TaskLibClient { const BlobId &blob_id, float score, u32 node_id, - bool user_score) { + bool user_score, + u32 task_flags = TASK_LOW_LATENCY | TASK_FIRE_AND_FORGET) { // HILOG(kDebug, "Beginning REORGANIZE (task_node={})", task_node); HRUN_CLIENT->ConstructTask( task, task_node, DomainId::GetNode(blob_id.node_id_), id_, - tag_id, blob_id, score, node_id, user_score); + tag_id, blob_id, score, node_id, user_score, task_flags); } HRUN_TASK_NODE_PUSH_ROOT(ReorganizeBlob); diff --git a/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h b/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h index 90fb30e6d..7a923d5b1 100644 --- a/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h +++ b/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h @@ -1104,14 +1104,15 @@ struct ReorganizeBlobTask : public Task, TaskFlags { const BlobId &blob_id, float score, u32 node_id, - bool is_user_score) : Task(alloc) { + bool is_user_score, + u32 task_flags = TASK_LOW_LATENCY | TASK_FIRE_AND_FORGET) : Task(alloc) { // Initialize task task_node_ = task_node; lane_hash_ = blob_id.hash_; prio_ = TaskPrio::kLowLatency; task_state_ = state_id; method_ = Method::kReorganizeBlob; - task_flags_.SetBits(TASK_LOW_LATENCY | TASK_FIRE_AND_FORGET); + task_flags_.SetBits(task_flags); domain_id_ = domain_id; // Custom params diff --git a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc index 50fd54343..7dc8c6ad3 100644 --- a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc +++ b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc @@ -258,10 +258,14 @@ class Server : public TaskLib { float new_score = MakeScore(blob_info, now); blob_info.score_ = new_score; if (ShouldReorganize(blob_info, new_score, task->task_node_)) { - blob_mdm_.AsyncReorganizeBlob(task->task_node_ + 1, - blob_info.tag_id_, - blob_info.blob_id_, - new_score, 0, false); + LPointer reorg_task = + blob_mdm_.AsyncReorganizeBlob(task->task_node_ + 1, + blob_info.tag_id_, + blob_info.blob_id_, + new_score, 0, false, + TASK_LOW_LATENCY); + reorg_task->Wait(task); + HRUN_CLIENT->DelTask(reorg_task); } blob_info.access_freq_ = 0; From 3e3e7c33e91de4fd78353048af5fa7f08d9f8b08 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Tue, 7 Nov 2023 23:33:10 -0600 Subject: [PATCH 034/114] Pass blob score --- tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc index 7dc8c6ad3..5f8390bc0 100644 --- a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc +++ b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc @@ -358,6 +358,7 @@ class Server : public TaskLib { if (size_diff > 0) { Context ctx; auto *dpe = DpeFactory::Get(ctx.dpe_); + ctx.blob_score_ = task->score_; dpe->Placement({size_diff}, targets_, ctx, schema_vec); } From d427504aa9074ffe02a235aef9b1c9d4a7b2823e Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Tue, 7 Nov 2023 23:36:17 -0600 Subject: [PATCH 035/114] Print better --- tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc index 5f8390bc0..82de59aa9 100644 --- a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc +++ b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc @@ -220,6 +220,8 @@ class Server : public TaskLib { return false; } for (const bdev::Client &cmp_tgt: targets_) { + HILOG(kInfo, "Move {} from {} to {} with score {} and tier score {}?", + blob_info.blob_id_, target.id_, cmp_tgt.id_, score, cmp_tgt.score_); if (cmp_tgt.score_ > score + .05) { continue; } From cf9db686fd0a06ca8ef6df89f9c9b5c34590f59a Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Tue, 7 Nov 2023 23:41:05 -0600 Subject: [PATCH 036/114] Return false --- tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc index 82de59aa9..e0dfbb732 100644 --- a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc +++ b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc @@ -230,6 +230,7 @@ class Server : public TaskLib { blob_info.blob_id_, target.id_, cmp_tgt.id_, score, cmp_tgt.score_); return true; } + return false; } // if (rem_cap <= max_cap * target.borg_min_thresh_) { // if (precentile_lt > 90) { From 0a9f9073fc20b553c0ad59ca414f0cfbf84d6eb6 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Tue, 7 Nov 2023 23:43:37 -0600 Subject: [PATCH 037/114] Only print on reorg --- tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc index e0dfbb732..086dcd891 100644 --- a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc +++ b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc @@ -220,8 +220,6 @@ class Server : public TaskLib { return false; } for (const bdev::Client &cmp_tgt: targets_) { - HILOG(kInfo, "Move {} from {} to {} with score {} and tier score {}?", - blob_info.blob_id_, target.id_, cmp_tgt.id_, score, cmp_tgt.score_); if (cmp_tgt.score_ > score + .05) { continue; } From 48e78ea9efcaa5fae839d3b00c3067bca2eba022 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Wed, 8 Nov 2023 00:22:47 -0600 Subject: [PATCH 038/114] More confined reorganization --- tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc | 57 +++++++++++--------- 1 file changed, 31 insertions(+), 26 deletions(-) diff --git a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc index 086dcd891..2e3a2cfaa 100644 --- a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc +++ b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc @@ -180,6 +180,14 @@ class Server : public TaskLib { return std::max(access_score, user_score); } } + const bdev::Client& FindNearestTarget(float score) { + for (const bdev::Client &cmp_tgt: targets_) { + if (cmp_tgt.score_ > score + .05) { + continue; + } + return cmp_tgt; + } + } /** Check if blob should be reorganized */ template @@ -194,17 +202,11 @@ class Server : public TaskLib { u32 precentile_lt = hist.GetPercentileLT(score); size_t rem_cap = target.monitor_task_->rem_cap_; size_t max_cap = target.max_cap_; + float borg_cap_min = target.borg_min_thresh_; + float borg_cap_max = target.borg_max_thresh_; // float min_score = hist.GetQuantile(0); // Update the target score target.score_ = target.bw_score_; -// if (rem_cap < max_cap * .5) { -// // Enough capacity has been used to make scoring important. -// target.score_ = target.bw_score_; -// } else { -// // There's a lot of capacity left. -// // Make DPE start placing data here. -// target.score_ = 0; -// } // Update blob score if constexpr(UPDATE_SCORE) { u32 bin_orig = hist.GetBin(blob_info.score_); @@ -217,29 +219,32 @@ class Server : public TaskLib { // Determine if the blob should be reorganized // Get the target with minimum difference in score to this blob if (abs(target.score_ - score) < .1) { - return false; + continue; } - for (const bdev::Client &cmp_tgt: targets_) { - if (cmp_tgt.score_ > score + .05) { - continue; + const bdev::Client &cmp_tgt = FindNearestTarget(score); + if (cmp_tgt.id_ == target.id_) { + continue; + } + if (cmp_tgt.score_ <= target.score_) { + // Demote if we have sufficiently low capacity + if (rem_cap < max_cap * borg_cap_min) { + HILOG(kInfo, "Demoting blob {} of score {} from tgt={} tgt_score={} to tgt={} tgt_score={}", + blob_info.blob_id_, blob_info.score_, + target.id_, target.score_, + cmp_tgt.id_, cmp_tgt.score_); + return true; } - if (cmp_tgt.id_ != target.id_) { - HILOG(kInfo, "Should move {} from {} to {} with score {} and tier score {}", - blob_info.blob_id_, target.id_, cmp_tgt.id_, score, cmp_tgt.score_); + } else { + // Promote if the guy above us has sufficiently high capacity + float cmp_rem_cap = cmp_tgt.monitor_task_->rem_cap_; + if (cmp_rem_cap > blob_info.blob_size_) { + HILOG(kInfo, "Promoting blob {} of score {} from tgt={} tgt_score={} to tgt={} tgt_score={}", + blob_info.blob_id_, blob_info.score_, + target.id_, target.score_, + cmp_tgt.id_, cmp_tgt.score_); return true; } - return false; } -// if (rem_cap <= max_cap * target.borg_min_thresh_) { -// if (precentile_lt > 90) { -// HILOG(kInfo, "Should reorganize based on max"); -// return true; -// } -// if (percentile < 10) { -// HILOG(kInfo, "Should reorganize based on rem"); -// return true; -// } -// } } return false; } From 1ab5a1c7409673f1c7849768aebd7e861767a0f7 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Wed, 8 Nov 2023 00:24:11 -0600 Subject: [PATCH 039/114] Return targets.back() --- tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc index 2e3a2cfaa..b9baebe6c 100644 --- a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc +++ b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc @@ -187,6 +187,7 @@ class Server : public TaskLib { } return cmp_tgt; } + return targets_.back(); } /** Check if blob should be reorganized */ From 8d9291b09f879acfa7ab71229fb7093e3f64a122 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Wed, 8 Nov 2023 02:26:29 -0600 Subject: [PATCH 040/114] Try using main alloc again --- hrun/include/hrun/api/hrun_client.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hrun/include/hrun/api/hrun_client.h b/hrun/include/hrun/api/hrun_client.h index d3047fa3a..75ab87e42 100644 --- a/hrun/include/hrun/api/hrun_client.h +++ b/hrun/include/hrun/api/hrun_client.h @@ -250,7 +250,7 @@ class Client : public ConfigurationManager { if constexpr(IN_CLIENT) { p = data_alloc_->AllocateLocalPtr(size); } else { - p = data_alloc_->AllocateLocalPtr(size); + p = main_alloc_->AllocateLocalPtr(size); } } catch (...) { p.shm_.SetNull(); @@ -275,7 +275,7 @@ class Client : public ConfigurationManager { if constexpr(IN_CLIENT) { p = data_alloc_->AllocateLocalPtr(size); } else { - p = data_alloc_->AllocateLocalPtr(size); + p = main_alloc_->AllocateLocalPtr(size); } } catch (...) { p.shm_.SetNull(); From d54f7e00c6c6140abfbea3c100a7e8fe8891bc38 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Wed, 8 Nov 2023 02:44:03 -0600 Subject: [PATCH 041/114] Print heap size after free --- hrun/include/hrun/api/hrun_client.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/hrun/include/hrun/api/hrun_client.h b/hrun/include/hrun/api/hrun_client.h index 75ab87e42..87835cd8f 100644 --- a/hrun/include/hrun/api/hrun_client.h +++ b/hrun/include/hrun/api/hrun_client.h @@ -258,7 +258,7 @@ class Client : public ConfigurationManager { if (!p.shm_.IsNull()) { break; } - HILOG(kInfo, "{} Could not allocate buffer of size {} (2)?", THREAD_MODEL, size); + // HILOG(kInfo, "{} Could not allocate buffer of size {} (2)?", THREAD_MODEL, size); Yield(yield_task); } return p; @@ -284,7 +284,7 @@ class Client : public ConfigurationManager { break; } Yield(); - HILOG(kInfo, "{} Could not allocate buffer of size {} (1)?", THREAD_MODEL, size); + // HILOG(kInfo, "{} Could not allocate buffer of size {} (1)?", THREAD_MODEL, size); } return p; } @@ -292,16 +292,16 @@ class Client : public ConfigurationManager { /** Free a buffer */ HSHM_ALWAYS_INLINE void FreeBuffer(hipc::Pointer &p) { - // HILOG(kInfo, "Heap size: {}", data_alloc_->GetCurrentlyAllocatedSize()); auto alloc = HERMES_MEMORY_MANAGER->GetAllocator(p.allocator_id_); + HILOG(kInfo, "Heap size: {}", data_alloc_->GetCurrentlyAllocatedSize()); alloc->Free(p); } /** Free a buffer */ HSHM_ALWAYS_INLINE void FreeBuffer(LPointer &p) { - // HILOG(kInfo, "Heap size: {}", data_alloc_->GetCurrentlyAllocatedSize()); auto alloc = HERMES_MEMORY_MANAGER->GetAllocator(p.shm_.allocator_id_); + HILOG(kInfo, "Heap size: {}", data_alloc_->GetCurrentlyAllocatedSize()); alloc->FreeLocalPtr(p); } From 0cd383564839771f3c8187e86890e2fcddb80e17 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Wed, 8 Nov 2023 02:58:10 -0600 Subject: [PATCH 042/114] Include put free --- .../include/hermes_blob_mdm/hermes_blob_mdm_tasks.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h b/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h index 7a923d5b1..22975b586 100644 --- a/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h +++ b/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h @@ -289,10 +289,12 @@ struct PutBlobTask : public Task, TaskFlags data_ = data; score_ = score; flags_ = bitfield32_t(flags | ctx.flags_.bits_); + HILOG(kInfo, "Creating PUT of size {}", data_size_); } /** Destructor */ ~PutBlobTask() { + HILOG(kInfo, "Freeing PUT of size {}", data_size_); HSHM_DESTROY_AR(blob_name_); if (IsDataOwner()) { HRUN_CLIENT->FreeBuffer(data_); From 3dab8f2d30df866fdb0c8e38e099c4caeaa51b4f Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Wed, 8 Nov 2023 03:01:56 -0600 Subject: [PATCH 043/114] Include put free --- .../include/hermes_blob_mdm/hermes_blob_mdm_tasks.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h b/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h index 22975b586..3d2471f1e 100644 --- a/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h +++ b/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h @@ -297,6 +297,7 @@ struct PutBlobTask : public Task, TaskFlags HILOG(kInfo, "Freeing PUT of size {}", data_size_); HSHM_DESTROY_AR(blob_name_); if (IsDataOwner()) { + HILOG(kInfo, "Actually freeing PUT of size {}", data_size_); HRUN_CLIENT->FreeBuffer(data_); } } From 61a752a9bbcf6a0018e5393a39f981a99bf4a2ee Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Wed, 8 Nov 2023 03:10:36 -0600 Subject: [PATCH 044/114] Put the node --- .../include/hermes_blob_mdm/hermes_blob_mdm_tasks.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h b/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h index 3d2471f1e..0cbf0dab4 100644 --- a/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h +++ b/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h @@ -289,15 +289,14 @@ struct PutBlobTask : public Task, TaskFlags data_ = data; score_ = score; flags_ = bitfield32_t(flags | ctx.flags_.bits_); - HILOG(kInfo, "Creating PUT of size {}", data_size_); + HILOG(kInfo, "Creating PUT {} of size {}", task_node_, data_size_); } /** Destructor */ ~PutBlobTask() { - HILOG(kInfo, "Freeing PUT of size {}", data_size_); HSHM_DESTROY_AR(blob_name_); if (IsDataOwner()) { - HILOG(kInfo, "Actually freeing PUT of size {}", data_size_); + HILOG(kInfo, "Actually freeing PUT {} of size {}", task_node_, data_size_); HRUN_CLIENT->FreeBuffer(data_); } } From 61141e7ff874eb7bed520666e416e14e3ac8ba16 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Wed, 8 Nov 2023 05:32:35 -0600 Subject: [PATCH 045/114] Try using flush --- hrun/include/hrun/api/hrun_client.h | 16 ++++++++--- .../include/hrun_admin/hrun_admin.h | 20 ++++++++++++++ include/hermes/bucket.h | 9 ++++--- test/unit/hermes/test_bucket.cc | 27 +++++++++++++++++++ 4 files changed, 66 insertions(+), 6 deletions(-) diff --git a/hrun/include/hrun/api/hrun_client.h b/hrun/include/hrun/api/hrun_client.h index 87835cd8f..d8092e65f 100644 --- a/hrun/include/hrun/api/hrun_client.h +++ b/hrun/include/hrun/api/hrun_client.h @@ -268,16 +268,22 @@ class Client : public ConfigurationManager { template HSHM_ALWAYS_INLINE LPointer AllocateBuffer(size_t size) { - // HILOG(kInfo, "{} Heap size: {}", THREAD_MODEL, data_alloc_->GetCurrentlyAllocatedSize()); + // LPointer p; while (true) { try { if constexpr(IN_CLIENT) { + auto id = data_alloc_->GetId(); + HILOG(kInfo, "{} Alloc {}/{} Heap size: {}", THREAD_MODEL, + id.bits_.major_, + id.bits_.minor_, + data_alloc_->GetCurrentlyAllocatedSize()); p = data_alloc_->AllocateLocalPtr(size); } else { p = main_alloc_->AllocateLocalPtr(size); } - } catch (...) { + } catch (hshm::Error &e) { + e.print(); p.shm_.SetNull(); } if (!p.shm_.IsNull()) { @@ -293,7 +299,10 @@ class Client : public ConfigurationManager { HSHM_ALWAYS_INLINE void FreeBuffer(hipc::Pointer &p) { auto alloc = HERMES_MEMORY_MANAGER->GetAllocator(p.allocator_id_); - HILOG(kInfo, "Heap size: {}", data_alloc_->GetCurrentlyAllocatedSize()); + HILOG(kInfo, "Heap size for {}/{}: {}", + p.allocator_id_.bits_.major_, + p.allocator_id_.bits_.minor_, + data_alloc_->GetCurrentlyAllocatedSize()); alloc->Free(p); } @@ -303,6 +312,7 @@ class Client : public ConfigurationManager { auto alloc = HERMES_MEMORY_MANAGER->GetAllocator(p.shm_.allocator_id_); HILOG(kInfo, "Heap size: {}", data_alloc_->GetCurrentlyAllocatedSize()); alloc->FreeLocalPtr(p); + HILOG(kInfo, "Heap size after: {}", data_alloc_->GetCurrentlyAllocatedSize()); } /** Convert pointer to char* */ diff --git a/hrun/tasks_required/hrun_admin/include/hrun_admin/hrun_admin.h b/hrun/tasks_required/hrun_admin/include/hrun_admin/hrun_admin.h index 0762ba70f..8f5227112 100644 --- a/hrun/tasks_required/hrun_admin/include/hrun_admin/hrun_admin.h +++ b/hrun/tasks_required/hrun_admin/include/hrun_admin/hrun_admin.h @@ -226,6 +226,26 @@ class Client : public TaskLibClient { HRUN_CLIENT->DelTask(task); } HRUN_TASK_NODE_ADMIN_ROOT(Flush); + + /** Allocate a buffer */ + HSHM_ALWAYS_INLINE + LPointer AllocateBuffer(size_t size) { + LPointer p; + while (true) { + try { + p = HRUN_CLIENT->data_alloc_->AllocateLocalPtr(size); + } catch (hshm::Error &e) { + e.print(); + p.shm_.SetNull(); + } + if (!p.shm_.IsNull()) { + break; + } + FlushRoot(DomainId::GetLocal()); + // HILOG(kInfo, "{} Could not allocate buffer of size {} (1)?", THREAD_MODEL, size); + } + return p; + } }; } // namespace hrun::Admin diff --git a/include/hermes/bucket.h b/include/hermes/bucket.h index 43d89c085..13a893535 100644 --- a/include/hermes/bucket.h +++ b/include/hermes/bucket.h @@ -226,7 +226,8 @@ class Bucket { bitfield32_t flags, task_flags( TASK_FIRE_AND_FORGET | TASK_DATA_OWNER | TASK_LOW_LATENCY); // Copy data to shared memory - LPointer p = HRUN_CLIENT->AllocateBuffer(blob.size()); + // LPointer p = HRUN_CLIENT->AllocateBuffer(blob.size()); + LPointer p = HRUN_ADMIN->AllocateBuffer(blob.size()); char *data = p.ptr_; memcpy(data, blob.data(), blob.size()); // Put to shared memory @@ -378,7 +379,8 @@ class Bucket { * Append \a blob_name Blob into the bucket (fully asynchronous) * */ void Append(const Blob &blob, size_t page_size, Context &ctx) { - LPointer p = HRUN_CLIENT->AllocateBuffer(blob.size()); + // LPointer p = HRUN_CLIENT->AllocateBuffer(blob.size()); + LPointer p = HRUN_ADMIN->AllocateBuffer(blob.size()); char *data = p.ptr_; memcpy(data, blob.data(), blob.size()); bkt_mdm_->AppendBlobRoot( @@ -445,7 +447,8 @@ class Bucket { } // Get from shared memory size_t data_size = blob.size(); - LPointer data_p = HRUN_CLIENT->AllocateBuffer(data_size); + // LPointer data_p = HRUN_CLIENT->AllocateBuffer(data_size); + LPointer data_p = HRUN_ADMIN->AllocateBuffer(blob.size()); LPointer> push_task; push_task = blob_mdm_->AsyncGetBlobRoot(id_, hshm::to_charbuf(blob_name), blob_id, blob_off, diff --git a/test/unit/hermes/test_bucket.cc b/test/unit/hermes/test_bucket.cc index 2cd437d24..71449d3a6 100644 --- a/test/unit/hermes/test_bucket.cc +++ b/test/unit/hermes/test_bucket.cc @@ -93,6 +93,33 @@ TEST_CASE("TestHermesPut") { MPI_Barrier(MPI_COMM_WORLD); } +TEST_CASE("TestHermesAsyncPut") { + int rank, nprocs; + MPI_Barrier(MPI_COMM_WORLD); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs); + + // Initialize Hermes on all nodes + HERMES->ClientInit(); + + // Create a bucket + hermes::Context ctx; + hermes::Bucket bkt("hello"); + + size_t count_per_proc = 256; + size_t off = rank * count_per_proc; + size_t proc_count = off + count_per_proc; + for (size_t i = off; i < proc_count; ++i) { + HILOG(kInfo, "Iteration: {}", i); + // Put a blob + hermes::Blob blob(MEGABYTES(1)); + memset(blob.data(), i % 256, blob.size()); + bkt.AsyncPut(std::to_string(i), blob, ctx); + } + MPI_Barrier(MPI_COMM_WORLD); + HRUN_ADMIN->FlushRoot(DomainId::GetGlobal()); +} + TEST_CASE("TestHermesPutGet") { int rank, nprocs; MPI_Barrier(MPI_COMM_WORLD); From 3c20f892475fc454d44ea3764b1ac4aee914bc37 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Wed, 8 Nov 2023 05:34:38 -0600 Subject: [PATCH 046/114] remove needless print --- hrun/include/hrun/api/hrun_client.h | 1 - hrun/tasks_required/hrun_admin/include/hrun_admin/hrun_admin.h | 1 - 2 files changed, 2 deletions(-) diff --git a/hrun/include/hrun/api/hrun_client.h b/hrun/include/hrun/api/hrun_client.h index d8092e65f..ca5613673 100644 --- a/hrun/include/hrun/api/hrun_client.h +++ b/hrun/include/hrun/api/hrun_client.h @@ -283,7 +283,6 @@ class Client : public ConfigurationManager { p = main_alloc_->AllocateLocalPtr(size); } } catch (hshm::Error &e) { - e.print(); p.shm_.SetNull(); } if (!p.shm_.IsNull()) { diff --git a/hrun/tasks_required/hrun_admin/include/hrun_admin/hrun_admin.h b/hrun/tasks_required/hrun_admin/include/hrun_admin/hrun_admin.h index 8f5227112..4aa3e1797 100644 --- a/hrun/tasks_required/hrun_admin/include/hrun_admin/hrun_admin.h +++ b/hrun/tasks_required/hrun_admin/include/hrun_admin/hrun_admin.h @@ -235,7 +235,6 @@ class Client : public TaskLibClient { try { p = HRUN_CLIENT->data_alloc_->AllocateLocalPtr(size); } catch (hshm::Error &e) { - e.print(); p.shm_.SetNull(); } if (!p.shm_.IsNull()) { From e389af5e9454bd1e303947a0347463c16e78fe0d Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Wed, 8 Nov 2023 06:13:49 -0600 Subject: [PATCH 047/114] Add libaio as a dependency --- CMakeLists.txt | 8 +++ tasks/bdev/include/bdev/bdev_tasks.h | 5 ++ tasks/posix_bdev/src/CMakeLists.txt | 2 +- tasks/posix_bdev/src/posix_bdev.cc | 93 +++++++++++++++++++++++++--- 4 files changed, 99 insertions(+), 9 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 63f44b6ae..a73181b54 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -86,6 +86,14 @@ if(PkgConfig) message(STATUS "found pkg config") endif() +# LIBAIO +find_library(LIBAIO_LIBRARY NAMES aio) +if(LIBAIO_LIBRARY) + message(STATUS "found libaio at ${LIBAIO_LIBRARY}") +else() + message(FATAL_ERROR "Could not find libaio, please install libaio-dev.") +endif() + # Zeromq #pkg_check_modules(ZMQ REQUIRED libzmq) #include_directories(${ZMQ_INCLUDE_DIRS}) diff --git a/tasks/bdev/include/bdev/bdev_tasks.h b/tasks/bdev/include/bdev/bdev_tasks.h index 28281c5f9..a96c54810 100644 --- a/tasks/bdev/include/bdev/bdev_tasks.h +++ b/tasks/bdev/include/bdev/bdev_tasks.h @@ -5,6 +5,7 @@ #ifndef HRUN_TASKS_BDEV_INCLUDE_BDEV_BDEV_TASKS_H_ #define HRUN_TASKS_BDEV_INCLUDE_BDEV_BDEV_TASKS_H_ +#include #include "hrun/api/hrun_client.h" #include "hrun/task_registry/task_lib.h" #include "hrun_admin/hrun_admin.h" @@ -164,6 +165,8 @@ struct WriteTask : public Task, TaskFlags { IN const char *buf_; /**< Data in memory */ IN size_t disk_off_; /**< Offset on disk */ IN size_t size_; /**< Size in buf */ + TEMP int phase_ = 0; + TEMP io_context_t ctx_ = 0; /** SHM default constructor */ HSHM_ALWAYS_INLINE explicit @@ -207,6 +210,8 @@ struct ReadTask : public Task, TaskFlags { IN char *buf_; /**< Data in memory */ IN size_t disk_off_; /**< Offset on disk */ IN size_t size_; /**< Size in disk buf */ + TEMP int phase_ = 0; + TEMP io_context_t ctx_ = 0; /** SHM default constructor */ HSHM_ALWAYS_INLINE explicit diff --git a/tasks/posix_bdev/src/CMakeLists.txt b/tasks/posix_bdev/src/CMakeLists.txt index 75695cf98..0d546d927 100644 --- a/tasks/posix_bdev/src/CMakeLists.txt +++ b/tasks/posix_bdev/src/CMakeLists.txt @@ -4,7 +4,7 @@ add_library(posix_bdev SHARED posix_bdev.cc) add_dependencies(posix_bdev ${Hermes_RUNTIME_DEPS}) -target_link_libraries(posix_bdev ${Hermes_RUNTIME_LIBRARIES}) +target_link_libraries(posix_bdev ${Hermes_RUNTIME_LIBRARIES} ${LIBAIO_LIBRARY}) #------------------------------------------------------------------------------ # Install Small Message Task Library diff --git a/tasks/posix_bdev/src/posix_bdev.cc b/tasks/posix_bdev/src/posix_bdev.cc index 393762563..5eb11533c 100644 --- a/tasks/posix_bdev/src/posix_bdev.cc +++ b/tasks/posix_bdev/src/posix_bdev.cc @@ -11,6 +11,7 @@ #include #include #include +#include namespace hermes::posix_bdev { @@ -74,11 +75,49 @@ class Server : public TaskLib, public bdev::Server { /** Write to bdev */ void Write(WriteTask *task, RunContext &rctx) { HILOG(kDebug, "Writing {} bytes to {}", task->size_, path_); - ssize_t count = pwrite(fd_, task->buf_, task->size_, (off_t)task->disk_off_); - if (count != task->size_) { - HELOG(kError, "BORG: wrote {} bytes, but expected {}: {}", - count, task->size_, strerror(errno)); + switch (task->phase_) { + case 0: { + int ret = io_setup(1, &task->ctx_); + if (ret < 0) { + perror("io_setup"); + HELOG(kError, "Libaio failed for write (1)"); + task->SetModuleComplete(); + return; + } + struct iocb xfer_iocb; + struct iocb *iocb_list[1]; + io_prep_pwrite(&xfer_iocb, fd_, (void*)task->buf_, task->size_, task->disk_off_); + xfer_iocb.data = (void*) task->buf_; + iocb_list[0] = &xfer_iocb; + ret = io_submit(task->ctx_, 1, iocb_list); + if (ret != 1) { + perror("io_submit"); + HELOG(kError, "Libaio failed for write (2)"); + task->SetModuleComplete(); + return; + } + task->phase_ = 1; + } + case 1: { + struct io_event events[1]; + struct timespec timeout{0, 0}; + int ret = io_getevents(task->ctx_, 1, 1, events, &timeout); + if (ret == -EAGAIN) { + return; + } else if (ret < 0) { + perror("io_getevents"); + HELOG(kError, "Libaio failed for write (3)"); + task->SetModuleComplete(); + return; + } + io_destroy(task->ctx_); + } } +// ssize_t count = pwrite(fd_, task->buf_, task->size_, (off_t)task->disk_off_); +// if (count != task->size_) { +// HELOG(kError, "BORG: wrote {} bytes, but expected {}: {}", +// count, task->size_, strerror(errno)); +// } task->SetModuleComplete(); } void MonitorWrite(u32 mode, WriteTask *task, RunContext &rctx) { @@ -87,11 +126,49 @@ class Server : public TaskLib, public bdev::Server { /** Read from bdev */ void Read(ReadTask *task, RunContext &rctx) { HILOG(kDebug, "Reading {} bytes from {}", task->size_, path_); - ssize_t count = pread(fd_, task->buf_, task->size_, (off_t)task->disk_off_); - if (count != task->size_) { - HELOG(kError, "BORG: read {} bytes, but expected {}", - count, task->size_); + switch (task->phase_) { + case 0: { + int ret = io_setup(1, &task->ctx_); + if (ret < 0) { + perror("io_setup"); + HELOG(kError, "Libaio failed for write (1)"); + task->SetModuleComplete(); + return; + } + struct iocb xfer_iocb; + struct iocb *iocb_list[1]; + io_prep_pread(&xfer_iocb, fd_, (void*)task->buf_, task->size_, task->disk_off_); + xfer_iocb.data = (void*) task->buf_; + iocb_list[0] = &xfer_iocb; + ret = io_submit(task->ctx_, 1, iocb_list); + if (ret != 1) { + perror("io_submit"); + HELOG(kError, "Libaio failed for write (2)"); + task->SetModuleComplete(); + return; + } + task->phase_ = 1; + } + case 1: { + struct io_event events[1]; + struct timespec timeout{0, 0}; + int ret = io_getevents(task->ctx_, 1, 1, events, &timeout); + if (ret == -EAGAIN) { + return; + } else if (ret < 0) { + perror("io_getevents"); + HELOG(kError, "Libaio failed for write (3)"); + task->SetModuleComplete(); + return; + } + io_destroy(task->ctx_); + } } +// ssize_t count = pread(fd_, task->buf_, task->size_, (off_t)task->disk_off_); +// if (count != task->size_) { +// HELOG(kError, "BORG: read {} bytes, but expected {}", +// count, task->size_); +// } task->SetModuleComplete(); } void MonitorRead(u32 mode, ReadTask *task, RunContext &rctx) { From e43f9a42730768ca56df74a1079d6e34cc6fc812 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Wed, 8 Nov 2023 06:15:20 -0600 Subject: [PATCH 048/114] Add libaio ot spack --- ci/hermes/packages/hermes/package.py | 1 + ci/hermes/packages/hermes_shm/package.py | 1 + 2 files changed, 2 insertions(+) diff --git a/ci/hermes/packages/hermes/package.py b/ci/hermes/packages/hermes/package.py index d0404e356..3106d7f48 100644 --- a/ci/hermes/packages/hermes/package.py +++ b/ci/hermes/packages/hermes/package.py @@ -44,6 +44,7 @@ class Hermes(CMakePackage): depends_on('mpi') depends_on('cereal') depends_on('yaml-cpp') + depends_on('libaio') depends_on('doxygen@1.9.3') depends_on('boost@1.7: +context +fiber +filesystem +system +atomic +chrono +serialization +signals +pic +regex') depends_on('libfabric fabrics=sockets,tcp,udp,verbs', diff --git a/ci/hermes/packages/hermes_shm/package.py b/ci/hermes/packages/hermes_shm/package.py index 5af8c607f..bd19884ba 100644 --- a/ci/hermes/packages/hermes_shm/package.py +++ b/ci/hermes/packages/hermes_shm/package.py @@ -21,6 +21,7 @@ class HermesShm(CMakePackage): depends_on('mpi') depends_on('cereal') depends_on('yaml-cpp') + depends_on('libaio') depends_on('doxygen@1.9.3') depends_on('boost@1.7: +context +fiber +filesystem +system +atomic +chrono +serialization +signals +pic +regex') depends_on('libfabric fabrics=sockets,tcp,udp,verbs', From 1d0effcaff1cbe654c2e8db1bc48d8aaf889b3b3 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Wed, 8 Nov 2023 12:05:34 -0600 Subject: [PATCH 049/114] Add custom libaio package --- ci/hermes/packages/libaio/package.py | 57 ++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 ci/hermes/packages/libaio/package.py diff --git a/ci/hermes/packages/libaio/package.py b/ci/hermes/packages/libaio/package.py new file mode 100644 index 000000000..c566ab046 --- /dev/null +++ b/ci/hermes/packages/libaio/package.py @@ -0,0 +1,57 @@ +# Copyright 2013-2023 Lawrence Livermore National Security, LLC and other +# Spack Project Developers. See the top-level COPYRIGHT file for details. +# +# SPDX-License-Identifier: (Apache-2.0 OR MIT) + + +from spack.package import * + + +class Libaio(MakefilePackage): + """Linux native Asynchronous I/O interface library. + + AIO enables even a single application thread to overlap I/O operations + with other processing, by providing an interface for submitting one or + more I/O requests in one system call (io_submit()) without waiting for + completion, and a separate interface (io_getevents()) to reap completed + I/O operations associated with a given completion group. + """ + + homepage = "http://lse.sourceforge.net/io/aio.html" + url = ( + "https://debian.inf.tu-dresden.de/debian/pool/main/liba/libaio/libaio_0.3.110.orig.tar.gz" + ) + + version("0.3.113", sha256="2c44d1c5fd0d43752287c9ae1eb9c023f04ef848ea8d4aafa46e9aedb678200b") + version("0.3.110", sha256="e019028e631725729376250e32b473012f7cb68e1f7275bfc1bbcdd0f8745f7e") + + conflicts("platform=darwin", msg="libaio is a linux specific library") + + @property + def install_targets(self): + return ["prefix={0}".format(self.spec.prefix), "install"] + + def set_include(self, env, path): + env.append_flags('CFLAGS', '-I{}'.format(path)) + env.append_flags('CXXFLAGS', '-I{}'.format(path)) + env.prepend_path('INCLUDE', '{}'.format(path)) + env.prepend_path('CPATH', '{}'.format(path)) + + def set_lib(self, env, path): + env.prepend_path('LIBRARY_PATH', path) + env.prepend_path('LD_LIBRARY_PATH', path) + env.append_flags('LDFLAGS', '-L{}'.format(path)) + env.prepend_path('PYTHONPATH', '{}'.format(path)) + + def set_flags(self, env): + self.set_include(env, '{}/include'.format(self.prefix)) + self.set_include(env, '{}/include'.format(self.prefix)) + self.set_lib(env, '{}/lib'.format(self.prefix)) + self.set_lib(env, '{}/lib64'.format(self.prefix)) + env.prepend_path('CMAKE_PREFIX_PATH', '{}/cmake'.format(self.prefix)) + + def setup_dependent_environment(self, spack_env, run_env, dependent_spec): + self.set_flags(spack_env) + + def setup_run_environment(self, env): + self.set_flags(env) From 9d658872e5a48970600658e0d2835308ec67408f Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Wed, 8 Nov 2023 12:08:51 -0600 Subject: [PATCH 050/114] Add cmake else for aio --- CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a73181b54..d25fdf0e8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -91,7 +91,8 @@ find_library(LIBAIO_LIBRARY NAMES aio) if(LIBAIO_LIBRARY) message(STATUS "found libaio at ${LIBAIO_LIBRARY}") else() - message(FATAL_ERROR "Could not find libaio, please install libaio-dev.") + set(LIBAIO_LIBRARY aio) + message(STATUS "Assuming it was installed with our aio spack") endif() # Zeromq From 70c68350350ee1dc3110e5b2b4a7792b39ea407b Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Wed, 8 Nov 2023 12:16:43 -0600 Subject: [PATCH 051/114] Remove excessive prints --- hrun/include/hrun/api/hrun_client.h | 20 +++++++++---------- .../hermes_blob_mdm/hermes_blob_mdm_tasks.h | 4 ++-- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/hrun/include/hrun/api/hrun_client.h b/hrun/include/hrun/api/hrun_client.h index ca5613673..299f4389e 100644 --- a/hrun/include/hrun/api/hrun_client.h +++ b/hrun/include/hrun/api/hrun_client.h @@ -274,10 +274,10 @@ class Client : public ConfigurationManager { try { if constexpr(IN_CLIENT) { auto id = data_alloc_->GetId(); - HILOG(kInfo, "{} Alloc {}/{} Heap size: {}", THREAD_MODEL, - id.bits_.major_, - id.bits_.minor_, - data_alloc_->GetCurrentlyAllocatedSize()); +// HILOG(kInfo, "{} Alloc {}/{} Heap size: {}", THREAD_MODEL, +// id.bits_.major_, +// id.bits_.minor_, +// data_alloc_->GetCurrentlyAllocatedSize()); p = data_alloc_->AllocateLocalPtr(size); } else { p = main_alloc_->AllocateLocalPtr(size); @@ -298,10 +298,10 @@ class Client : public ConfigurationManager { HSHM_ALWAYS_INLINE void FreeBuffer(hipc::Pointer &p) { auto alloc = HERMES_MEMORY_MANAGER->GetAllocator(p.allocator_id_); - HILOG(kInfo, "Heap size for {}/{}: {}", - p.allocator_id_.bits_.major_, - p.allocator_id_.bits_.minor_, - data_alloc_->GetCurrentlyAllocatedSize()); +// HILOG(kInfo, "Heap size for {}/{}: {}", +// p.allocator_id_.bits_.major_, +// p.allocator_id_.bits_.minor_, +// data_alloc_->GetCurrentlyAllocatedSize()); alloc->Free(p); } @@ -309,9 +309,9 @@ class Client : public ConfigurationManager { HSHM_ALWAYS_INLINE void FreeBuffer(LPointer &p) { auto alloc = HERMES_MEMORY_MANAGER->GetAllocator(p.shm_.allocator_id_); - HILOG(kInfo, "Heap size: {}", data_alloc_->GetCurrentlyAllocatedSize()); +// HILOG(kInfo, "Heap size: {}", data_alloc_->GetCurrentlyAllocatedSize()); alloc->FreeLocalPtr(p); - HILOG(kInfo, "Heap size after: {}", data_alloc_->GetCurrentlyAllocatedSize()); +// HILOG(kInfo, "Heap size after: {}", data_alloc_->GetCurrentlyAllocatedSize()); } /** Convert pointer to char* */ diff --git a/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h b/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h index 0cbf0dab4..29ac920f2 100644 --- a/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h +++ b/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h @@ -289,14 +289,14 @@ struct PutBlobTask : public Task, TaskFlags data_ = data; score_ = score; flags_ = bitfield32_t(flags | ctx.flags_.bits_); - HILOG(kInfo, "Creating PUT {} of size {}", task_node_, data_size_); + // HILOG(kInfo, "Creating PUT {} of size {}", task_node_, data_size_); } /** Destructor */ ~PutBlobTask() { HSHM_DESTROY_AR(blob_name_); if (IsDataOwner()) { - HILOG(kInfo, "Actually freeing PUT {} of size {}", task_node_, data_size_); + // HILOG(kInfo, "Actually freeing PUT {} of size {}", task_node_, data_size_); HRUN_CLIENT->FreeBuffer(data_); } } From e813f132ca410f4d65f103b9bf03a9770626fffb Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Wed, 8 Nov 2023 12:19:03 -0600 Subject: [PATCH 052/114] Make lane hash for reads equal to disk_off --- tasks/bdev/include/bdev/bdev_tasks.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tasks/bdev/include/bdev/bdev_tasks.h b/tasks/bdev/include/bdev/bdev_tasks.h index a96c54810..94cc2003a 100644 --- a/tasks/bdev/include/bdev/bdev_tasks.h +++ b/tasks/bdev/include/bdev/bdev_tasks.h @@ -183,7 +183,7 @@ struct WriteTask : public Task, TaskFlags { size_t size) : Task(alloc) { // Initialize task task_node_ = task_node; - lane_hash_ = 0; + lane_hash_ = disk_off; prio_ = TaskPrio::kLowLatency; task_state_ = state_id; method_ = Method::kWrite; @@ -228,7 +228,7 @@ struct ReadTask : public Task, TaskFlags { size_t size) : Task(alloc) { // Initialize task task_node_ = task_node; - lane_hash_ = 0; + lane_hash_ = disk_off; prio_ = TaskPrio::kLowLatency; task_state_ = state_id; method_ = Method::kRead; From 527d30219ff3f07483ca74422c38df13692a19e5 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Wed, 8 Nov 2023 12:29:03 -0600 Subject: [PATCH 053/114] Switch back to posix temporarily --- tasks/posix_bdev/src/posix_bdev.cc | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/tasks/posix_bdev/src/posix_bdev.cc b/tasks/posix_bdev/src/posix_bdev.cc index 5eb11533c..debd2e70a 100644 --- a/tasks/posix_bdev/src/posix_bdev.cc +++ b/tasks/posix_bdev/src/posix_bdev.cc @@ -75,6 +75,7 @@ class Server : public TaskLib, public bdev::Server { /** Write to bdev */ void Write(WriteTask *task, RunContext &rctx) { HILOG(kDebug, "Writing {} bytes to {}", task->size_, path_); +#ifdef HERMES_LIBAIO switch (task->phase_) { case 0: { int ret = io_setup(1, &task->ctx_); @@ -113,11 +114,13 @@ class Server : public TaskLib, public bdev::Server { io_destroy(task->ctx_); } } -// ssize_t count = pwrite(fd_, task->buf_, task->size_, (off_t)task->disk_off_); -// if (count != task->size_) { -// HELOG(kError, "BORG: wrote {} bytes, but expected {}: {}", -// count, task->size_, strerror(errno)); -// } +#else + ssize_t count = pwrite(fd_, task->buf_, task->size_, (off_t)task->disk_off_); + if (count != task->size_) { + HELOG(kError, "BORG: wrote {} bytes, but expected {}: {}", + count, task->size_, strerror(errno)); + } +#endif task->SetModuleComplete(); } void MonitorWrite(u32 mode, WriteTask *task, RunContext &rctx) { @@ -126,6 +129,7 @@ class Server : public TaskLib, public bdev::Server { /** Read from bdev */ void Read(ReadTask *task, RunContext &rctx) { HILOG(kDebug, "Reading {} bytes from {}", task->size_, path_); +#ifdef HERMES_LIBAIO switch (task->phase_) { case 0: { int ret = io_setup(1, &task->ctx_); @@ -164,11 +168,13 @@ class Server : public TaskLib, public bdev::Server { io_destroy(task->ctx_); } } -// ssize_t count = pread(fd_, task->buf_, task->size_, (off_t)task->disk_off_); -// if (count != task->size_) { -// HELOG(kError, "BORG: read {} bytes, but expected {}", -// count, task->size_); -// } +#else + ssize_t count = pread(fd_, task->buf_, task->size_, (off_t)task->disk_off_); + if (count != task->size_) { + HELOG(kError, "BORG: read {} bytes, but expected {}", + count, task->size_); + } +#endif task->SetModuleComplete(); } void MonitorRead(u32 mode, ReadTask *task, RunContext &rctx) { From 02babb8b71b28ebee5a2a9516c0b0c0cc5d0d579 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Wed, 8 Nov 2023 12:41:22 -0600 Subject: [PATCH 054/114] Make it so low-latency queues are on different workers than I/O queues --- .../hrun/queue_manager/queues/hshm_queue.h | 6 ++++++ .../src/worch_queue_round_robin.cc | 15 +++++++++++++-- tasks/bdev/include/bdev/bdev.h | 2 +- 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/hrun/include/hrun/queue_manager/queues/hshm_queue.h b/hrun/include/hrun/queue_manager/queues/hshm_queue.h index 94e921527..85a85aeb3 100644 --- a/hrun/include/hrun/queue_manager/queues/hshm_queue.h +++ b/hrun/include/hrun/queue_manager/queues/hshm_queue.h @@ -75,6 +75,12 @@ struct LaneGroup : public PriorityInfo { return flags_.Any(QUEUE_LONG_RUNNING) || prio_ == 0; } + /** Check if this group is long-running or ADMIN */ + HSHM_ALWAYS_INLINE + bool IsLowLatency() { + return flags_.Any(QUEUE_LOW_LATENCY); + } + /** Get lane */ Lane& GetLane(u32 lane_id) { return (*lanes_)[lane_id]; diff --git a/hrun/tasks_required/worch_queue_round_robin/src/worch_queue_round_robin.cc b/hrun/tasks_required/worch_queue_round_robin/src/worch_queue_round_robin.cc index a208fde93..d6a38fe1e 100644 --- a/hrun/tasks_required/worch_queue_round_robin/src/worch_queue_round_robin.cc +++ b/hrun/tasks_required/worch_queue_round_robin/src/worch_queue_round_robin.cc @@ -44,7 +44,7 @@ class Server : public TaskLib { continue; } for (LaneGroup &lane_group : *queue.groups_) { - // NOTE(llogan): Assumes a minimum of two workers, admin on worker 0. + // NOTE(llogan): Assumes a minimum of three workers, admin on worker 0. if (lane_group.IsLowPriority()) { for (u32 lane_id = lane_group.num_scheduled_; lane_id < lane_group.num_lanes_; ++lane_id) { // HILOG(kDebug, "Scheduling the queue {} (lane {})", queue.id_, lane_id); @@ -53,9 +53,20 @@ class Server : public TaskLib { } lane_group.num_scheduled_ = lane_group.num_lanes_; } else { + u32 rem_workers = HRUN_WORK_ORCHESTRATOR->workers_.size() - 1; + u32 off_lowlat = 1; + u32 count_lowlat = rem_workers / 2; + rem_workers -= count_lowlat; + u32 off_highlat = off_lowlat + count_lowlat; + u32 count_highlat = rem_workers; for (u32 lane_id = lane_group.num_scheduled_; lane_id < lane_group.num_lanes_; ++lane_id) { // HILOG(kDebug, "Scheduling the queue {} (lane {})", queue.id_, lane_id); - u32 worker_id = (count_ % (HRUN_WORK_ORCHESTRATOR->workers_.size() - 1)) + 1; + u32 worker_id; + if (lane_group.IsLowLatency()) { + worker_id = (count_ % count_lowlat) + off_lowlat; + } else { + worker_id = (count_ % count_highlat) + off_highlat; + } Worker &worker = *HRUN_WORK_ORCHESTRATOR->workers_[worker_id]; worker.PollQueues({WorkEntry(lane_group.prio_, lane_id, &queue)}); count_ += 1; diff --git a/tasks/bdev/include/bdev/bdev.h b/tasks/bdev/include/bdev/bdev.h index 44bf27258..873912406 100644 --- a/tasks/bdev/include/bdev/bdev.h +++ b/tasks/bdev/include/bdev/bdev.h @@ -52,7 +52,7 @@ class Client : public TaskLibClient { std::vector queue_info = { {1, 1, qm.queue_depth_, 0}, {1, 1, qm.queue_depth_, QUEUE_LONG_RUNNING}, - {4, 4, qm.queue_depth_, QUEUE_LOW_LATENCY} + {qm.max_lanes_, qm.max_lanes_, qm.queue_depth_, 0} }; return HRUN_ADMIN->AsyncCreateTaskState( task_node, domain_id, state_name, lib_name, id_, From e3f089134fb76f0ba0581d5e0d457c81264e133b Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Wed, 8 Nov 2023 12:52:11 -0600 Subject: [PATCH 055/114] Print heap size --- hrun/include/hrun/api/hrun_client.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hrun/include/hrun/api/hrun_client.h b/hrun/include/hrun/api/hrun_client.h index 299f4389e..e6bf9e386 100644 --- a/hrun/include/hrun/api/hrun_client.h +++ b/hrun/include/hrun/api/hrun_client.h @@ -311,7 +311,7 @@ class Client : public ConfigurationManager { auto alloc = HERMES_MEMORY_MANAGER->GetAllocator(p.shm_.allocator_id_); // HILOG(kInfo, "Heap size: {}", data_alloc_->GetCurrentlyAllocatedSize()); alloc->FreeLocalPtr(p); -// HILOG(kInfo, "Heap size after: {}", data_alloc_->GetCurrentlyAllocatedSize()); + HILOG(kInfo, "Heap size after: {}", data_alloc_->GetCurrentlyAllocatedSize()); } /** Convert pointer to char* */ From 84ce7c5f58c8e33eea9b177e76a04b151a58b5e3 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Wed, 8 Nov 2023 12:56:00 -0600 Subject: [PATCH 056/114] Print heap size --- hrun/include/hrun/api/hrun_client.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/hrun/include/hrun/api/hrun_client.h b/hrun/include/hrun/api/hrun_client.h index e6bf9e386..18e63e864 100644 --- a/hrun/include/hrun/api/hrun_client.h +++ b/hrun/include/hrun/api/hrun_client.h @@ -298,10 +298,10 @@ class Client : public ConfigurationManager { HSHM_ALWAYS_INLINE void FreeBuffer(hipc::Pointer &p) { auto alloc = HERMES_MEMORY_MANAGER->GetAllocator(p.allocator_id_); -// HILOG(kInfo, "Heap size for {}/{}: {}", -// p.allocator_id_.bits_.major_, -// p.allocator_id_.bits_.minor_, -// data_alloc_->GetCurrentlyAllocatedSize()); + HILOG(kInfo, "Heap size for {}/{}: {}", + p.allocator_id_.bits_.major_, + p.allocator_id_.bits_.minor_, + data_alloc_->GetCurrentlyAllocatedSize()); alloc->Free(p); } From 500c67ca3e91fa556f1a0b94e460e5cdc06833ae Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Wed, 8 Nov 2023 18:06:28 -0600 Subject: [PATCH 057/114] Prining heap size again --- hrun/include/hrun/api/hrun_client.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/hrun/include/hrun/api/hrun_client.h b/hrun/include/hrun/api/hrun_client.h index 18e63e864..075506fd5 100644 --- a/hrun/include/hrun/api/hrun_client.h +++ b/hrun/include/hrun/api/hrun_client.h @@ -298,20 +298,22 @@ class Client : public ConfigurationManager { HSHM_ALWAYS_INLINE void FreeBuffer(hipc::Pointer &p) { auto alloc = HERMES_MEMORY_MANAGER->GetAllocator(p.allocator_id_); - HILOG(kInfo, "Heap size for {}/{}: {}", + alloc->Free(p); + HILOG(kInfo, "Heap size (1) for {}/{}: {}", p.allocator_id_.bits_.major_, p.allocator_id_.bits_.minor_, data_alloc_->GetCurrentlyAllocatedSize()); - alloc->Free(p); } /** Free a buffer */ HSHM_ALWAYS_INLINE void FreeBuffer(LPointer &p) { auto alloc = HERMES_MEMORY_MANAGER->GetAllocator(p.shm_.allocator_id_); -// HILOG(kInfo, "Heap size: {}", data_alloc_->GetCurrentlyAllocatedSize()); alloc->FreeLocalPtr(p); - HILOG(kInfo, "Heap size after: {}", data_alloc_->GetCurrentlyAllocatedSize()); + HILOG(kInfo, "Heap size (2) for {}/{}: {}", + alloc->GetId().bits_.major_, + alloc->GetId().bits_.minor_, + data_alloc_->GetCurrentlyAllocatedSize()); } /** Convert pointer to char* */ From f4ae91be4fd5443cc4c41dc87b8f7743808b86d2 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Wed, 8 Nov 2023 18:42:17 -0600 Subject: [PATCH 058/114] Only use data alloc for now --- hrun/include/hrun/api/hrun_client.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hrun/include/hrun/api/hrun_client.h b/hrun/include/hrun/api/hrun_client.h index 075506fd5..0eb3f6d5a 100644 --- a/hrun/include/hrun/api/hrun_client.h +++ b/hrun/include/hrun/api/hrun_client.h @@ -250,7 +250,7 @@ class Client : public ConfigurationManager { if constexpr(IN_CLIENT) { p = data_alloc_->AllocateLocalPtr(size); } else { - p = main_alloc_->AllocateLocalPtr(size); + p = data_alloc_->AllocateLocalPtr(size); } } catch (...) { p.shm_.SetNull(); @@ -280,7 +280,7 @@ class Client : public ConfigurationManager { // data_alloc_->GetCurrentlyAllocatedSize()); p = data_alloc_->AllocateLocalPtr(size); } else { - p = main_alloc_->AllocateLocalPtr(size); + p = data_alloc_->AllocateLocalPtr(size); } } catch (hshm::Error &e) { p.shm_.SetNull(); From 275a6a7e66e8fa18df59caccad6eb27446485e8a Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Thu, 9 Nov 2023 04:26:33 -0600 Subject: [PATCH 059/114] Make separate server and client allocators for buffers --- hrun/include/hrun/api/hrun_client.h | 49 +++---------------- .../include/hrun_admin/hrun_admin.h | 2 +- .../remote_queue/src/remote_queue.cc | 2 +- include/hermes/bucket.h | 9 ++-- .../data_stager/factory/binary_stager.h | 2 +- tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc | 4 +- tasks/hermes_data_op/src/hermes_data_op.cc | 4 +- 7 files changed, 16 insertions(+), 56 deletions(-) diff --git a/hrun/include/hrun/api/hrun_client.h b/hrun/include/hrun/api/hrun_client.h index 0eb3f6d5a..8416246b1 100644 --- a/hrun/include/hrun/api/hrun_client.h +++ b/hrun/include/hrun/api/hrun_client.h @@ -240,57 +240,20 @@ class Client : public ConfigurationManager { } /** Allocate a buffer in a task */ - template + template HSHM_ALWAYS_INLINE - LPointer AllocateBuffer(size_t size, Task *yield_task) { + LPointer AllocateBufferServer(size_t size, Task *yield_task) { LPointer p; - // HILOG(kInfo, "Heap size: {}", data_alloc_->GetCurrentlyAllocatedSize()); - while (true) { - try { - if constexpr(IN_CLIENT) { - p = data_alloc_->AllocateLocalPtr(size); - } else { - p = data_alloc_->AllocateLocalPtr(size); - } - } catch (...) { - p.shm_.SetNull(); - } - if (!p.shm_.IsNull()) { - break; - } - // HILOG(kInfo, "{} Could not allocate buffer of size {} (2)?", THREAD_MODEL, size); - Yield(yield_task); - } + p = main_alloc_->AllocateLocalPtr(size); return p; } /** Allocate a buffer */ - template + template HSHM_ALWAYS_INLINE - LPointer AllocateBuffer(size_t size) { - // + LPointer AllocateBufferServer(size_t size) { LPointer p; - while (true) { - try { - if constexpr(IN_CLIENT) { - auto id = data_alloc_->GetId(); -// HILOG(kInfo, "{} Alloc {}/{} Heap size: {}", THREAD_MODEL, -// id.bits_.major_, -// id.bits_.minor_, -// data_alloc_->GetCurrentlyAllocatedSize()); - p = data_alloc_->AllocateLocalPtr(size); - } else { - p = data_alloc_->AllocateLocalPtr(size); - } - } catch (hshm::Error &e) { - p.shm_.SetNull(); - } - if (!p.shm_.IsNull()) { - break; - } - Yield(); - // HILOG(kInfo, "{} Could not allocate buffer of size {} (1)?", THREAD_MODEL, size); - } + p = main_alloc_->AllocateLocalPtr(size); return p; } diff --git a/hrun/tasks_required/hrun_admin/include/hrun_admin/hrun_admin.h b/hrun/tasks_required/hrun_admin/include/hrun_admin/hrun_admin.h index 4aa3e1797..40fc424c9 100644 --- a/hrun/tasks_required/hrun_admin/include/hrun_admin/hrun_admin.h +++ b/hrun/tasks_required/hrun_admin/include/hrun_admin/hrun_admin.h @@ -229,7 +229,7 @@ class Client : public TaskLibClient { /** Allocate a buffer */ HSHM_ALWAYS_INLINE - LPointer AllocateBuffer(size_t size) { + LPointer AllocateBufferClient(size_t size) { LPointer p; while (true) { try { diff --git a/hrun/tasks_required/remote_queue/src/remote_queue.cc b/hrun/tasks_required/remote_queue/src/remote_queue.cc index 3bfc5ac24..0c2b915a0 100644 --- a/hrun/tasks_required/remote_queue/src/remote_queue.cc +++ b/hrun/tasks_required/remote_queue/src/remote_queue.cc @@ -379,7 +379,7 @@ class Server : public TaskLib { size_t data_size, IoType io_type) { LPointer data = - HRUN_CLIENT->AllocateBuffer(data_size); + HRUN_CLIENT->AllocateBufferServer(data_size); // Create the input data transfer object std::vector xfer(2); diff --git a/include/hermes/bucket.h b/include/hermes/bucket.h index 13a893535..10dbf7103 100644 --- a/include/hermes/bucket.h +++ b/include/hermes/bucket.h @@ -226,8 +226,7 @@ class Bucket { bitfield32_t flags, task_flags( TASK_FIRE_AND_FORGET | TASK_DATA_OWNER | TASK_LOW_LATENCY); // Copy data to shared memory - // LPointer p = HRUN_CLIENT->AllocateBuffer(blob.size()); - LPointer p = HRUN_ADMIN->AllocateBuffer(blob.size()); + LPointer p = HRUN_ADMIN->AllocateBufferClient(blob.size()); char *data = p.ptr_; memcpy(data, blob.data(), blob.size()); // Put to shared memory @@ -379,8 +378,7 @@ class Bucket { * Append \a blob_name Blob into the bucket (fully asynchronous) * */ void Append(const Blob &blob, size_t page_size, Context &ctx) { - // LPointer p = HRUN_CLIENT->AllocateBuffer(blob.size()); - LPointer p = HRUN_ADMIN->AllocateBuffer(blob.size()); + LPointer p = HRUN_ADMIN->AllocateBufferClient(blob.size()); char *data = p.ptr_; memcpy(data, blob.data(), blob.size()); bkt_mdm_->AppendBlobRoot( @@ -447,8 +445,7 @@ class Bucket { } // Get from shared memory size_t data_size = blob.size(); - // LPointer data_p = HRUN_CLIENT->AllocateBuffer(data_size); - LPointer data_p = HRUN_ADMIN->AllocateBuffer(blob.size()); + LPointer data_p = HRUN_ADMIN->AllocateBufferClient(blob.size()); LPointer> push_task; push_task = blob_mdm_->AsyncGetBlobRoot(id_, hshm::to_charbuf(blob_name), blob_id, blob_off, diff --git a/tasks/data_stager/include/data_stager/factory/binary_stager.h b/tasks/data_stager/include/data_stager/factory/binary_stager.h index 5e6f2c7ba..1e3a3bedb 100644 --- a/tasks/data_stager/include/data_stager/factory/binary_stager.h +++ b/tasks/data_stager/include/data_stager/factory/binary_stager.h @@ -54,7 +54,7 @@ class BinaryFileStager : public AbstractStager { plcmnt.DecodeBlobName(*task->blob_name_, page_size_); HILOG(kDebug, "Attempting to stage {} bytes from the backend file {} at offset {}", page_size_, url_, plcmnt.bucket_off_); - LPointer blob = HRUN_CLIENT->AllocateBuffer(page_size_); + LPointer blob = HRUN_CLIENT->AllocateBufferServer(page_size_); fd_ = HERMES_POSIX_API->open(path_.c_str(), O_CREAT | O_RDWR, 0666); if (fd_ < 0) { HELOG(kError, "Failed to open file {}", path_); diff --git a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc index b9baebe6c..16156a21a 100644 --- a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc +++ b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc @@ -282,7 +282,7 @@ class Server : public TaskLib { mod_count > blob_info.last_flush_) { HILOG(kDebug, "Flushing blob {} (mod_count={}, last_flush={})", blob_info.blob_id_, blob_info.mod_count_, blob_info.last_flush_); - LPointer data = HRUN_CLIENT->AllocateBuffer( + LPointer data = HRUN_CLIENT->AllocateBufferServer( blob_info.blob_size_, task); LPointer get_blob = blob_mdm_.AsyncGetBlob(task->task_node_ + 1, @@ -849,7 +849,7 @@ class Server : public TaskLib { task->SetModuleComplete(); return; } - task->data_ = HRUN_CLIENT->AllocateBuffer( + task->data_ = HRUN_CLIENT->AllocateBufferServer( blob_info.blob_size_, task).shm_; task->data_size_ = blob_info.blob_size_; task->get_task_ = blob_mdm_.AsyncGetBlob(task->task_node_ + 1, diff --git a/tasks/hermes_data_op/src/hermes_data_op.cc b/tasks/hermes_data_op/src/hermes_data_op.cc index f46e02fb9..09a20d6ff 100644 --- a/tasks/hermes_data_op/src/hermes_data_op.cc +++ b/tasks/hermes_data_op/src/hermes_data_op.cc @@ -183,7 +183,7 @@ class Server : public TaskLib { for (OpData &data : op_data) { // Get the input data LPointer data_ptr = - HRUN_CLIENT->AllocateBuffer(data.size_, task); + HRUN_CLIENT->AllocateBufferServer(data.size_, task); LPointer in_task = blob_mdm_.AsyncGetBlob(task->task_node_ + 1, data.bkt_id_, @@ -203,7 +203,7 @@ class Server : public TaskLib { // Calaculate the minimum LPointer min_lptr = - HRUN_CLIENT->AllocateBuffer(sizeof(float), task); + HRUN_CLIENT->AllocateBufferServer(sizeof(float), task); float *min_ptr = (float*)min_lptr.ptr_; *min_ptr = std::numeric_limits::max(); for (size_t i = 0; i < in_task->data_size_; i += sizeof(float)) { From 9890376ddebf46b8c32b218af06cdb7ac8b9f1e8 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Thu, 9 Nov 2023 04:51:31 -0600 Subject: [PATCH 060/114] Don't re-schedule workers --- .../include/hrun/work_orchestrator/affinity.h | 131 ++++++++++++++++++ hrun/src/work_orchestrator.cc | 2 + .../src/worch_proc_round_robin.cc | 12 +- .../src/worch_queue_round_robin.cc | 20 +-- 4 files changed, 152 insertions(+), 13 deletions(-) diff --git a/hrun/include/hrun/work_orchestrator/affinity.h b/hrun/include/hrun/work_orchestrator/affinity.h index 55f54fc1f..0dbc32ab7 100644 --- a/hrun/include/hrun/work_orchestrator/affinity.h +++ b/hrun/include/hrun/work_orchestrator/affinity.h @@ -14,6 +14,11 @@ #define HRUN_INCLUDE_HRUN_WORK_ORCHESTRATOR_AFFINITY_H_ #include +#include +#include +#include +#include +#include class ProcessAffiner { public: @@ -30,6 +35,132 @@ class ProcessAffiner { // HELOG(kError, "Failed to set CPU affinity for process {}", pid); } } + + private: + int n_cpu_; + cpu_set_t *cpus_; + + public: + ProcessAffiner() { + n_cpu_ = get_nprocs_conf(); + cpus_ = new cpu_set_t[n_cpu_]; + CPU_ZERO(cpus_); + } + + ~ProcessAffiner() { + delete cpus_; + } + + inline bool isdigit(char digit) { + return ('0' <= digit && digit <= '9'); + } + + inline int GetNumCPU() { + return n_cpu_; + } + + inline void SetCpu(int cpu) { + CPU_SET(cpu, cpus_); + } + + inline void SetCpus(int off, int len) { + for (int i = 0; i < len; ++i) { + SetCpu(off + i); + } + } + + inline void ClearCpu(int cpu) { + CPU_CLR(cpu, cpus_); + } + + inline void ClearCpus(int off, int len) { + for (int i = 0; i < len; ++i) { + ClearCpu(off + i); + } + } + + inline void Clear() { + CPU_ZERO(cpus_); + } + + int AffineAll(void) { + DIR *procdir; + struct dirent *entry; + size_t count = 0; + + // Open /proc directory. + procdir = opendir("/proc"); + if (!procdir) { + perror("opendir failed"); + return 0; + } + + // Iterate through all files and folders of /proc. + while ((entry = readdir(procdir))) { + // Skip anything that is not a PID folder. + if (!is_pid_folder(entry)) + continue; + // Get the PID of the running process + int proc_pid = atoi(entry->d_name); + // Set the affinity of all running process to this mask + count += Affine(proc_pid); + } + closedir(procdir); + return count; + } + int Affine(std::vector &&pids) { + return Affine(pids); + } + int Affine(std::vector &pids) { + // Set the affinity of all running process to this mask + size_t count = 0; + for (pid_t &pid : pids) { + count += Affine(pid); + } + return count; + } + int Affine(int pid) { + return SetAffinitySafe(pid, n_cpu_, cpus_); + } + + void PrintAffinity(int pid) { + PrintAffinity("", pid); + } + void PrintAffinity(std::string prefix, int pid) { + std::vector cpus(n_cpu_); + sched_getaffinity(pid, n_cpu_, cpus.data()); + PrintAffinity(prefix, pid, cpus.data()); + } + + void PrintAffinity(std::string prefix, int pid, cpu_set_t *cpus) { + std::string affinity = ""; + for (int i = 0; i < n_cpu_; ++i) { + if (CPU_ISSET(i, cpus)) { + affinity += std::to_string(i) + ", "; + } + } + printf("%s: CPU affinity[pid=%d]: %s\n", prefix.c_str(), + pid, affinity.c_str()); + } + + private: + int SetAffinitySafe(int pid, int n_cpu, cpu_set_t *cpus) { + int ret = sched_setaffinity(pid, n_cpu, cpus); + if (ret == -1) { + return 0; + } + return 1; + } + + // Helper function to check if a struct dirent from /proc is a PID folder. + int is_pid_folder(const struct dirent *entry) { + const char *p; + for (p = entry->d_name; *p; p++) { + if (!isdigit(*p)) + return false; + } + return true; + } }; #endif // HRUN_INCLUDE_HRUN_WORK_ORCHESTRATOR_AFFINITY_H_ diff --git a/hrun/src/work_orchestrator.cc b/hrun/src/work_orchestrator.cc index 285d75d1f..ec9294f6a 100644 --- a/hrun/src/work_orchestrator.cc +++ b/hrun/src/work_orchestrator.cc @@ -32,6 +32,8 @@ void WorkOrchestrator::ServerInit(ServerConfig *config, QueueManager &qm) { workers_.reserve(num_workers); for (u32 worker_id = 0; worker_id < num_workers; ++worker_id) { workers_.emplace_back(std::make_unique(worker_id, xstream_)); + Worker &worker = *workers_.back(); + worker.SetCpuAffinity(worker_id % HERMES_SYSTEM_INFO->ncpu_); } stop_runtime_ = false; kill_requested_ = false; diff --git a/hrun/tasks_required/worch_proc_round_robin/src/worch_proc_round_robin.cc b/hrun/tasks_required/worch_proc_round_robin/src/worch_proc_round_robin.cc index 2964ded4b..457c1a89e 100644 --- a/hrun/tasks_required/worch_proc_round_robin/src/worch_proc_round_robin.cc +++ b/hrun/tasks_required/worch_proc_round_robin/src/worch_proc_round_robin.cc @@ -34,11 +34,13 @@ class Server : public TaskLib { /** Schedule running processes */ void Schedule(ScheduleTask *task, RunContext &rctx) { - int rr = 0; - for (std::unique_ptr &worker : HRUN_WORK_ORCHESTRATOR->workers_) { - worker->SetCpuAffinity(rr % HERMES_SYSTEM_INFO->ncpu_); - ++rr; - } +// int rr = 0; +// ProcessAffiner affiner; +// affiner.AffineAll(); +// for (std::unique_ptr &worker : HRUN_WORK_ORCHESTRATOR->workers_) { +// worker->SetCpuAffinity(rr % HERMES_SYSTEM_INFO->ncpu_); +// ++rr; +// } } void MonitorSchedule(u32 mode, ScheduleTask *task, RunContext &rctx) { } diff --git a/hrun/tasks_required/worch_queue_round_robin/src/worch_queue_round_robin.cc b/hrun/tasks_required/worch_queue_round_robin/src/worch_queue_round_robin.cc index d6a38fe1e..6296095c0 100644 --- a/hrun/tasks_required/worch_queue_round_robin/src/worch_queue_round_robin.cc +++ b/hrun/tasks_required/worch_queue_round_robin/src/worch_queue_round_robin.cc @@ -18,12 +18,14 @@ namespace hrun::worch_queue_round_robin { class Server : public TaskLib { public: - u32 count_; + u32 count_lowlat_; + u32 count_highlat_; public: /** Construct work orchestrator queue scheduler */ void Construct(ConstructTask *task, RunContext &rctx) { - count_ = 0; + count_lowlat_ = 0; + count_highlat_ = 0; task->SetModuleComplete(); } void MonitorConstruct(u32 mode, ConstructTask *task, RunContext &rctx) { @@ -61,15 +63,17 @@ class Server : public TaskLib { u32 count_highlat = rem_workers; for (u32 lane_id = lane_group.num_scheduled_; lane_id < lane_group.num_lanes_; ++lane_id) { // HILOG(kDebug, "Scheduling the queue {} (lane {})", queue.id_, lane_id); - u32 worker_id; if (lane_group.IsLowLatency()) { - worker_id = (count_ % count_lowlat) + off_lowlat; + u32 worker_id = (count_lowlat_ % count_lowlat) + off_lowlat; + count_lowlat_ += 1; + Worker &worker = *HRUN_WORK_ORCHESTRATOR->workers_[worker_id]; + worker.PollQueues({WorkEntry(lane_group.prio_, lane_id, &queue)}); } else { - worker_id = (count_ % count_highlat) + off_highlat; + u32 worker_id = (count_highlat_ % count_highlat) + off_highlat; + count_highlat_ += 1; + Worker &worker = *HRUN_WORK_ORCHESTRATOR->workers_[worker_id]; + worker.PollQueues({WorkEntry(lane_group.prio_, lane_id, &queue)}); } - Worker &worker = *HRUN_WORK_ORCHESTRATOR->workers_[worker_id]; - worker.PollQueues({WorkEntry(lane_group.prio_, lane_id, &queue)}); - count_ += 1; } lane_group.num_scheduled_ = lane_group.num_lanes_; } From 8074969da647339e0bffe097cb15c2309549cc4a Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Thu, 9 Nov 2023 04:59:37 -0600 Subject: [PATCH 061/114] Use prio of dup task and remote task --- .../remote_queue/include/remote_queue/remote_queue.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hrun/tasks_required/remote_queue/include/remote_queue/remote_queue.h b/hrun/tasks_required/remote_queue/include/remote_queue/remote_queue.h index 99444864c..5ca9526e5 100644 --- a/hrun/tasks_required/remote_queue/include/remote_queue/remote_queue.h +++ b/hrun/tasks_required/remote_queue/include/remote_queue/remote_queue.h @@ -78,7 +78,7 @@ class Client : public TaskLibClient { orig_task->task_node_ + 1, DomainId::GetLocal(), id_, domain_ids, orig_task, exec, orig_task->method_, xfer); MultiQueue *queue = HRUN_CLIENT->GetQueue(queue_id_); - queue->Emplace(TaskPrio::kLowLatency, orig_task->lane_hash_, push_task.shm_); + queue->Emplace(push_task->prio_, orig_task->lane_hash_, push_task.shm_); HILOG(kDebug, "Did dispersion for (task_node={}, task_state={}, method={})", orig_task->task_node_ + 1, orig_task->task_state_, orig_task->method_) } @@ -106,7 +106,7 @@ class Client : public TaskLibClient { orig_task->task_node_ + 1, id_, orig_task, exec, orig_task->method_, dups); MultiQueue *queue = HRUN_CLIENT->GetQueue(queue_id_); - queue->Emplace(TaskPrio::kLowLatency, orig_task->lane_hash_, dup_task.shm_); + queue->Emplace(dup_task->prio_, orig_task->lane_hash_, dup_task.shm_); } /** Spawn task to accept new connections */ From 67fea9f0a6813c1516c20ebbf27fddd9d52d69d5 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Thu, 9 Nov 2023 05:02:12 -0600 Subject: [PATCH 062/114] Use 4x qdepth for remote queue --- .../remote_queue/include/remote_queue/remote_queue.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hrun/tasks_required/remote_queue/include/remote_queue/remote_queue.h b/hrun/tasks_required/remote_queue/include/remote_queue/remote_queue.h index 5ca9526e5..2f557f277 100644 --- a/hrun/tasks_required/remote_queue/include/remote_queue/remote_queue.h +++ b/hrun/tasks_required/remote_queue/include/remote_queue/remote_queue.h @@ -34,9 +34,9 @@ class Client : public TaskLibClient { QueueManagerInfo &qm = HRUN_CLIENT->server_config_.queue_manager_; std::vector queue_info = { {1, 1, qm.queue_depth_, 0}, - {1, 1, qm.queue_depth_, QUEUE_LONG_RUNNING}, + {1, 1, 4 * qm.queue_depth_, QUEUE_LONG_RUNNING}, // {qm.max_lanes_, qm.max_lanes_, qm.queue_depth_, QUEUE_LOW_LATENCY} - {1, 1, qm.queue_depth_, QUEUE_LOW_LATENCY}, + {1, 1, 4 * qm.queue_depth_, QUEUE_LOW_LATENCY}, }; return HRUN_ADMIN->AsyncCreateTaskState( task_node, domain_id, state_name, id_, queue_info); From 03224a326a541d24cb2d92a92a1a2ac9d9a9dfef Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Thu, 9 Nov 2023 05:12:52 -0600 Subject: [PATCH 063/114] Use even more queue depth --- .../hrun/queue_manager/queues/hshm_queue.h | 3 +- .../hrun/queue_manager/queues/mpsc_queue.h | 288 ++++++++++++++++++ .../include/remote_queue/remote_queue.h | 5 +- 3 files changed, 293 insertions(+), 3 deletions(-) create mode 100644 hrun/include/hrun/queue_manager/queues/mpsc_queue.h diff --git a/hrun/include/hrun/queue_manager/queues/hshm_queue.h b/hrun/include/hrun/queue_manager/queues/hshm_queue.h index 85a85aeb3..d4707c72f 100644 --- a/hrun/include/hrun/queue_manager/queues/hshm_queue.h +++ b/hrun/include/hrun/queue_manager/queues/hshm_queue.h @@ -6,6 +6,7 @@ #define HRUN_INCLUDE_HRUN_QUEUE_MANAGER_HSHM_QUEUE_H_ #include "hrun/queue_manager/queue.h" +#include "mpsc_queue.h" namespace hrun { @@ -24,7 +25,7 @@ struct LaneData { }; /** Represents a lane tasks can be stored */ -typedef hipc::mpsc_queue Lane; +typedef hrun::mpsc_queue Lane; /** Prioritization of different lanes in the queue */ struct LaneGroup : public PriorityInfo { diff --git a/hrun/include/hrun/queue_manager/queues/mpsc_queue.h b/hrun/include/hrun/queue_manager/queues/mpsc_queue.h new file mode 100644 index 000000000..c102d80fe --- /dev/null +++ b/hrun/include/hrun/queue_manager/queues/mpsc_queue.h @@ -0,0 +1,288 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * +* Distributed under BSD 3-Clause license. * +* Copyright by The HDF Group. * +* Copyright by the Illinois Institute of Technology. * +* All rights reserved. * +* * +* This file is part of Hermes. The full Hermes copyright notice, including * +* terms governing use, modification, and redistribution, is contained in * +* the COPYING file, which can be found at the top directory. If you do not * +* have access to the file, you may request a copy from help@hdfgroup.org. * +* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +#ifndef HRUN_INCLUDE_HRUN_DATA_STRUCTURES_IPC_mpsc_queue_H_ +#define HRUN_INCLUDE_HRUN_DATA_STRUCTURES_IPC_mpsc_queue_H_ + +#include "hermes_shm/data_structures/ipc/internal/shm_internal.h" +#include "hermes_shm/thread/lock.h" +#include "hermes_shm/data_structures/ipc/vector.h" +#include "hermes_shm/data_structures/ipc/pair.h" +#include "hermes_shm/types/qtok.h" + +namespace hrun { + +/** Forward declaration of mpsc_queue */ +template +class mpsc_queue; + +/** + * MACROS used to simplify the mpsc_queue namespace + * Used as inputs to the SHM_CONTAINER_TEMPLATE + * */ +#define CLASS_NAME mpsc_queue +#define TYPED_CLASS mpsc_queue +#define TYPED_HEADER ShmHeader> + +using hipc::ShmContainer; +using hipc::pair; +using hshm::_qtok_t; +using hshm::qtok_t; +using hipc::vector; +using hipc::ShmArchive; +using hipc::Allocator; +using hshm::bitfield32_t; +using hshm::make_argpack; + +/** + * A queue optimized for multiple producers (emplace) with a single + * consumer (pop). + * */ +template +class mpsc_queue : public ShmContainer { + public: + SHM_CONTAINER_TEMPLATE((CLASS_NAME), (TYPED_CLASS)) + ShmArchive>> queue_; + std::atomic<_qtok_t> tail_; + std::atomic<_qtok_t> head_; + bitfield32_t flags_; + + public: + /**==================================== + * Default Constructor + * ===================================*/ + + /** SHM constructor. Default. */ + explicit mpsc_queue(Allocator *alloc, + size_t depth = 1024) { + shm_init_container(alloc); + HSHM_MAKE_AR(queue_, GetAllocator(), depth); + flags_.Clear(); + SetNull(); + } + + /**==================================== + * Copy Constructors + * ===================================*/ + + /** SHM copy constructor */ + explicit mpsc_queue(Allocator *alloc, + const mpsc_queue &other) { + shm_init_container(alloc); + SetNull(); + shm_strong_copy_construct_and_op(other); + } + + /** SHM copy assignment operator */ + mpsc_queue& operator=(const mpsc_queue &other) { + if (this != &other) { + shm_destroy(); + shm_strong_copy_construct_and_op(other); + } + return *this; + } + + /** SHM copy constructor + operator main */ + void shm_strong_copy_construct_and_op(const mpsc_queue &other) { + head_ = other.head_.load(); + tail_ = other.tail_.load(); + (*queue_) = (*other.queue_); + } + + /**==================================== + * Move Constructors + * ===================================*/ + + /** SHM move constructor. */ + mpsc_queue(Allocator *alloc, + mpsc_queue &&other) noexcept { + shm_init_container(alloc); + if (GetAllocator() == other.GetAllocator()) { + head_ = other.head_.load(); + tail_ = other.tail_.load(); + (*queue_) = std::move(*other.queue_); + other.SetNull(); + } else { + shm_strong_copy_construct_and_op(other); + other.shm_destroy(); + } + } + + /** SHM move assignment operator. */ + mpsc_queue& operator=(mpsc_queue &&other) noexcept { + if (this != &other) { + shm_destroy(); + if (GetAllocator() == other.GetAllocator()) { + head_ = other.head_.load(); + tail_ = other.tail_.load(); + (*queue_) = std::move(*other.queue_); + other.SetNull(); + } else { + shm_strong_copy_construct_and_op(other); + other.shm_destroy(); + } + } + return *this; + } + + /**==================================== + * Destructor + * ===================================*/ + + /** SHM destructor. */ + void shm_destroy_main() { + (*queue_).shm_destroy(); + } + + /** Check if the list is empty */ + bool IsNull() const { + return (*queue_).IsNull(); + } + + /** Sets this list as empty */ + void SetNull() { + head_ = 0; + tail_ = 0; + } + + /**==================================== + * MPSC Queue Methods + * ===================================*/ + + /** Construct an element at \a pos position in the list */ + template + qtok_t emplace(Args&&... args) { + // Allocate a slot in the queue + // The slot is marked NULL, so pop won't do anything if context switch + _qtok_t head = head_.load(); + _qtok_t tail = tail_.fetch_add(1); + size_t size = tail - head + 1; + vector> &queue = (*queue_); + + // Check if there's space in the queue. + if (size > queue.size()) { + while (true) { + head = head_.load(); + size = tail - head + 1; + if (size <= (*queue_).size()) { + break; + } + HERMES_THREAD_MODEL->Yield(); + } + } + + // Emplace into queue at our slot + uint32_t idx = tail % queue.size(); + auto iter = queue.begin() + idx; + queue.replace(iter, + hshm::PiecewiseConstruct(), + make_argpack(), + make_argpack(std::forward(args)...)); + + // Let pop know that the data is fully prepared + pair &entry = (*iter); + entry.GetFirst().SetBits(1); + return qtok_t(tail); + } + + public: + /** Consumer pops the head object */ + qtok_t pop(T &val) { + // Don't pop if there's no entries + _qtok_t head = head_.load(); + _qtok_t tail = tail_.load(); + if (head >= tail) { + return qtok_t::GetNull(); + } + + // Pop the element, but only if it's marked valid + _qtok_t idx = head % (*queue_).size(); + hipc::pair &entry = (*queue_)[idx]; + if (entry.GetFirst().Any(1)) { + val = std::move(entry.GetSecond()); + entry.GetFirst().Clear(); + head_.fetch_add(1); + return qtok_t(head); + } else { + return qtok_t::GetNull(); + } + } + + /** Consumer pops the head object */ + qtok_t pop() { + // Don't pop if there's no entries + _qtok_t head = head_.load(); + _qtok_t tail = tail_.load(); + if (head >= tail) { + return qtok_t::GetNull(); + } + + // Pop the element, but only if it's marked valid + _qtok_t idx = head % (*queue_).size(); + hipc::pair &entry = (*queue_)[idx]; + if (entry.GetFirst().Any(1)) { + entry.GetFirst().Clear(); + head_.fetch_add(1); + return qtok_t(head); + } else { + return qtok_t::GetNull(); + } + } + + /** Consumer peeks an object */ + qtok_t peek(T *&val, int off = 0) { + // Don't pop if there's no entries + _qtok_t head = head_.load() + off; + _qtok_t tail = tail_.load(); + if (head >= tail) { + return qtok_t::GetNull(); + } + + // Pop the element, but only if it's marked valid + _qtok_t idx = (head) % (*queue_).size(); + hipc::pair &entry = (*queue_)[idx]; + if (entry.GetFirst().Any(1)) { + val = &entry.GetSecond(); + return qtok_t(head); + } else { + return qtok_t::GetNull(); + } + } + + /** Consumer peeks an object */ + qtok_t peek(pair *&val, int off = 0) { + // Don't pop if there's no entries + _qtok_t head = head_.load() + off; + _qtok_t tail = tail_.load(); + if (head >= tail) { + return qtok_t::GetNull(); + } + + // Pop the element, but only if it's marked valid + _qtok_t idx = (head) % (*queue_).size(); + hipc::pair &entry = (*queue_)[idx]; + if (entry.GetFirst().Any(1)) { + val = &entry; + return qtok_t(head); + } else { + return qtok_t::GetNull(); + } + } +}; + +} // namespace hshm::ipc + +#undef CLASS_NAME +#undef TYPED_CLASS +#undef TYPED_HEADER + +#endif // HRUN_INCLUDE_HRUN_DATA_STRUCTURES_IPC_mpsc_queue_H_ diff --git a/hrun/tasks_required/remote_queue/include/remote_queue/remote_queue.h b/hrun/tasks_required/remote_queue/include/remote_queue/remote_queue.h index 2f557f277..52496e811 100644 --- a/hrun/tasks_required/remote_queue/include/remote_queue/remote_queue.h +++ b/hrun/tasks_required/remote_queue/include/remote_queue/remote_queue.h @@ -32,11 +32,12 @@ class Client : public TaskLibClient { const TaskStateId &state_id) { id_ = state_id; QueueManagerInfo &qm = HRUN_CLIENT->server_config_.queue_manager_; + // NOTE(llogan): 32x queue depth b/c default num rpc threads is 32 std::vector queue_info = { {1, 1, qm.queue_depth_, 0}, - {1, 1, 4 * qm.queue_depth_, QUEUE_LONG_RUNNING}, + {1, 1, 32 * qm.queue_depth_, QUEUE_LONG_RUNNING}, // {qm.max_lanes_, qm.max_lanes_, qm.queue_depth_, QUEUE_LOW_LATENCY} - {1, 1, 4 * qm.queue_depth_, QUEUE_LOW_LATENCY}, + {1, 1, 32 * qm.queue_depth_, QUEUE_LOW_LATENCY}, }; return HRUN_ADMIN->AsyncCreateTaskState( task_node, domain_id, state_name, id_, queue_info); From 4fc616d4c7324c16e10e9c5d09dc1581c69f5789 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Thu, 9 Nov 2023 05:15:54 -0600 Subject: [PATCH 064/114] Detect if queue is full at any point --- hrun/include/hrun/queue_manager/queues/mpsc_queue.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hrun/include/hrun/queue_manager/queues/mpsc_queue.h b/hrun/include/hrun/queue_manager/queues/mpsc_queue.h index c102d80fe..3f3bc4b66 100644 --- a/hrun/include/hrun/queue_manager/queues/mpsc_queue.h +++ b/hrun/include/hrun/queue_manager/queues/mpsc_queue.h @@ -18,6 +18,7 @@ #include "hermes_shm/data_structures/ipc/vector.h" #include "hermes_shm/data_structures/ipc/pair.h" #include "hermes_shm/types/qtok.h" +#include "hrun/hrun_types.h" namespace hrun { @@ -170,6 +171,7 @@ class mpsc_queue : public ShmContainer { // Check if there's space in the queue. if (size > queue.size()) { + HILOG(kInfo, "Queue is full, waiting for space") while (true) { head = head_.load(); size = tail - head + 1; From 41bd80bd29de517f57bbf0f7d27b2ab45c4e7bf1 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Thu, 9 Nov 2023 05:17:55 -0600 Subject: [PATCH 065/114] Use data alloc and main alloc again --- hrun/include/hrun/api/hrun_client.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hrun/include/hrun/api/hrun_client.h b/hrun/include/hrun/api/hrun_client.h index 8416246b1..07e3238b9 100644 --- a/hrun/include/hrun/api/hrun_client.h +++ b/hrun/include/hrun/api/hrun_client.h @@ -244,7 +244,7 @@ class Client : public ConfigurationManager { HSHM_ALWAYS_INLINE LPointer AllocateBufferServer(size_t size, Task *yield_task) { LPointer p; - p = main_alloc_->AllocateLocalPtr(size); + p = data_alloc_->AllocateLocalPtr(size); return p; } @@ -253,7 +253,7 @@ class Client : public ConfigurationManager { HSHM_ALWAYS_INLINE LPointer AllocateBufferServer(size_t size) { LPointer p; - p = main_alloc_->AllocateLocalPtr(size); + p = data_alloc_->AllocateLocalPtr(size); return p; } From b1e62f9d469f75f89d77c96683ce33d50f8c6b1b Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Thu, 9 Nov 2023 06:27:46 -0600 Subject: [PATCH 066/114] Make proc queue more limited --- hrun/tasks_required/proc_queue/include/proc_queue/proc_queue.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hrun/tasks_required/proc_queue/include/proc_queue/proc_queue.h b/hrun/tasks_required/proc_queue/include/proc_queue/proc_queue.h index 1db95bc66..948fc7464 100644 --- a/hrun/tasks_required/proc_queue/include/proc_queue/proc_queue.h +++ b/hrun/tasks_required/proc_queue/include/proc_queue/proc_queue.h @@ -39,7 +39,8 @@ class Client : public TaskLibClient { std::vector queue_info = { {1, 1, qm.queue_depth_, 0}, {1, 1, qm.queue_depth_, QUEUE_LONG_RUNNING}, - {qm.max_lanes_, qm.max_lanes_, qm.queue_depth_, QUEUE_LOW_LATENCY} + // TODO(llogan): Specify different depth for proc queue + {qm.max_lanes_, qm.max_lanes_, 16, QUEUE_LOW_LATENCY} }; return HRUN_ADMIN->AsyncCreateTaskState( task_node, domain_id, state_name, id_, queue_info); From 0848fae11af3122715cfeef90f995d6c6027e8b3 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Thu, 9 Nov 2023 06:38:01 -0600 Subject: [PATCH 067/114] Pass queue id --- hrun/include/hrun/queue_manager/queues/hshm_queue.h | 2 +- hrun/include/hrun/queue_manager/queues/mpsc_queue.h | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/hrun/include/hrun/queue_manager/queues/hshm_queue.h b/hrun/include/hrun/queue_manager/queues/hshm_queue.h index d4707c72f..61a23cfbb 100644 --- a/hrun/include/hrun/queue_manager/queues/hshm_queue.h +++ b/hrun/include/hrun/queue_manager/queues/hshm_queue.h @@ -128,7 +128,7 @@ struct MultiQueueT : public hipc::ShmContainer { lane_group.lanes_->reserve(prio_info.max_lanes_); lane_group.prio_ = prio; for (u32 lane_id = 0; lane_id < lane_group.num_lanes_; ++lane_id) { - lane_group.lanes_->emplace_back(lane_group.depth_); + lane_group.lanes_->emplace_back(lane_group.depth_, id_); Lane &lane = lane_group.lanes_->back(); lane.flags_ = prio_info.flags_; } diff --git a/hrun/include/hrun/queue_manager/queues/mpsc_queue.h b/hrun/include/hrun/queue_manager/queues/mpsc_queue.h index 3f3bc4b66..7b403b435 100644 --- a/hrun/include/hrun/queue_manager/queues/mpsc_queue.h +++ b/hrun/include/hrun/queue_manager/queues/mpsc_queue.h @@ -56,6 +56,7 @@ class mpsc_queue : public ShmContainer { std::atomic<_qtok_t> tail_; std::atomic<_qtok_t> head_; bitfield32_t flags_; + QueueId id_; public: /**==================================== @@ -64,10 +65,12 @@ class mpsc_queue : public ShmContainer { /** SHM constructor. Default. */ explicit mpsc_queue(Allocator *alloc, - size_t depth = 1024) { + size_t depth = 1024, + QueueId id = QueueId::GetNull()) { shm_init_container(alloc); HSHM_MAKE_AR(queue_, GetAllocator(), depth); flags_.Clear(); + id_ = id; SetNull(); } @@ -171,7 +174,7 @@ class mpsc_queue : public ShmContainer { // Check if there's space in the queue. if (size > queue.size()) { - HILOG(kInfo, "Queue is full, waiting for space") + HILOG(kInfo, "Queue {}/{} is full, waiting for space", id_, queue_->size()); while (true) { head = head_.load(); size = tail - head + 1; From 0b2ccf90c2deafe2d1c089438fd748fea792b545 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Thu, 9 Nov 2023 06:52:49 -0600 Subject: [PATCH 068/114] Print queue --- .../worch_queue_round_robin/src/worch_queue_round_robin.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hrun/tasks_required/worch_queue_round_robin/src/worch_queue_round_robin.cc b/hrun/tasks_required/worch_queue_round_robin/src/worch_queue_round_robin.cc index 6296095c0..4dfa3cc11 100644 --- a/hrun/tasks_required/worch_queue_round_robin/src/worch_queue_round_robin.cc +++ b/hrun/tasks_required/worch_queue_round_robin/src/worch_queue_round_robin.cc @@ -62,7 +62,7 @@ class Server : public TaskLib { u32 off_highlat = off_lowlat + count_lowlat; u32 count_highlat = rem_workers; for (u32 lane_id = lane_group.num_scheduled_; lane_id < lane_group.num_lanes_; ++lane_id) { - // HILOG(kDebug, "Scheduling the queue {} (lane {})", queue.id_, lane_id); + HILOG(kDebug, "Scheduling the queue {} (lane {})", queue.id_, lane_id); if (lane_group.IsLowLatency()) { u32 worker_id = (count_lowlat_ % count_lowlat) + off_lowlat; count_lowlat_ += 1; From 6008250a9ab6c8af995aaf121abaccd4557fe22a Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Thu, 9 Nov 2023 07:57:40 -0600 Subject: [PATCH 069/114] Better debug for schedule --- .../worch_queue_round_robin/src/worch_queue_round_robin.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hrun/tasks_required/worch_queue_round_robin/src/worch_queue_round_robin.cc b/hrun/tasks_required/worch_queue_round_robin/src/worch_queue_round_robin.cc index 4dfa3cc11..9b457cac6 100644 --- a/hrun/tasks_required/worch_queue_round_robin/src/worch_queue_round_robin.cc +++ b/hrun/tasks_required/worch_queue_round_robin/src/worch_queue_round_robin.cc @@ -62,17 +62,18 @@ class Server : public TaskLib { u32 off_highlat = off_lowlat + count_lowlat; u32 count_highlat = rem_workers; for (u32 lane_id = lane_group.num_scheduled_; lane_id < lane_group.num_lanes_; ++lane_id) { - HILOG(kDebug, "Scheduling the queue {} (lane {})", queue.id_, lane_id); if (lane_group.IsLowLatency()) { u32 worker_id = (count_lowlat_ % count_lowlat) + off_lowlat; count_lowlat_ += 1; Worker &worker = *HRUN_WORK_ORCHESTRATOR->workers_[worker_id]; worker.PollQueues({WorkEntry(lane_group.prio_, lane_id, &queue)}); + HILOG(kDebug, "Scheduling the queue {} (lane {}, worker {})", queue.id_, lane_id, worker_id); } else { u32 worker_id = (count_highlat_ % count_highlat) + off_highlat; count_highlat_ += 1; Worker &worker = *HRUN_WORK_ORCHESTRATOR->workers_[worker_id]; worker.PollQueues({WorkEntry(lane_group.prio_, lane_id, &queue)}); + HILOG(kDebug, "Scheduling the queue {} (lane {}, worker {})", queue.id_, lane_id, worker_id); } } lane_group.num_scheduled_ = lane_group.num_lanes_; From 37d0b897e62fd5d17a923f84463cb475410775ab Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Thu, 9 Nov 2023 08:02:11 -0600 Subject: [PATCH 070/114] Partition low and high-priority queues --- .../src/worch_queue_round_robin.cc | 9 --------- 1 file changed, 9 deletions(-) diff --git a/hrun/tasks_required/worch_queue_round_robin/src/worch_queue_round_robin.cc b/hrun/tasks_required/worch_queue_round_robin/src/worch_queue_round_robin.cc index 9b457cac6..e93b85d5a 100644 --- a/hrun/tasks_required/worch_queue_round_robin/src/worch_queue_round_robin.cc +++ b/hrun/tasks_required/worch_queue_round_robin/src/worch_queue_round_robin.cc @@ -46,15 +46,6 @@ class Server : public TaskLib { continue; } for (LaneGroup &lane_group : *queue.groups_) { - // NOTE(llogan): Assumes a minimum of three workers, admin on worker 0. - if (lane_group.IsLowPriority()) { - for (u32 lane_id = lane_group.num_scheduled_; lane_id < lane_group.num_lanes_; ++lane_id) { - // HILOG(kDebug, "Scheduling the queue {} (lane {})", queue.id_, lane_id); - Worker &worker = *HRUN_WORK_ORCHESTRATOR->workers_[0]; - worker.PollQueues({WorkEntry(lane_group.prio_, lane_id, &queue)}); - } - lane_group.num_scheduled_ = lane_group.num_lanes_; - } else { u32 rem_workers = HRUN_WORK_ORCHESTRATOR->workers_.size() - 1; u32 off_lowlat = 1; u32 count_lowlat = rem_workers / 2; From 87a09567ae87e55b0caf29541ccc20972ef98a2b Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Thu, 9 Nov 2023 08:05:45 -0600 Subject: [PATCH 071/114] Make high latency queues first --- .../src/worch_queue_round_robin.cc | 40 +++++++++---------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/hrun/tasks_required/worch_queue_round_robin/src/worch_queue_round_robin.cc b/hrun/tasks_required/worch_queue_round_robin/src/worch_queue_round_robin.cc index e93b85d5a..6bb2a057a 100644 --- a/hrun/tasks_required/worch_queue_round_robin/src/worch_queue_round_robin.cc +++ b/hrun/tasks_required/worch_queue_round_robin/src/worch_queue_round_robin.cc @@ -46,29 +46,27 @@ class Server : public TaskLib { continue; } for (LaneGroup &lane_group : *queue.groups_) { - u32 rem_workers = HRUN_WORK_ORCHESTRATOR->workers_.size() - 1; - u32 off_lowlat = 1; - u32 count_lowlat = rem_workers / 2; - rem_workers -= count_lowlat; - u32 off_highlat = off_lowlat + count_lowlat; - u32 count_highlat = rem_workers; - for (u32 lane_id = lane_group.num_scheduled_; lane_id < lane_group.num_lanes_; ++lane_id) { - if (lane_group.IsLowLatency()) { - u32 worker_id = (count_lowlat_ % count_lowlat) + off_lowlat; - count_lowlat_ += 1; - Worker &worker = *HRUN_WORK_ORCHESTRATOR->workers_[worker_id]; - worker.PollQueues({WorkEntry(lane_group.prio_, lane_id, &queue)}); - HILOG(kDebug, "Scheduling the queue {} (lane {}, worker {})", queue.id_, lane_id, worker_id); - } else { - u32 worker_id = (count_highlat_ % count_highlat) + off_highlat; - count_highlat_ += 1; - Worker &worker = *HRUN_WORK_ORCHESTRATOR->workers_[worker_id]; - worker.PollQueues({WorkEntry(lane_group.prio_, lane_id, &queue)}); - HILOG(kDebug, "Scheduling the queue {} (lane {}, worker {})", queue.id_, lane_id, worker_id); - } + u32 rem_workers = HRUN_WORK_ORCHESTRATOR->workers_.size(); + u32 count_highlat = rem_workers / 2; + rem_workers -= count_highlat; + u32 off_lowlat = count_highlat; + u32 count_lowlat = rem_workers; + for (u32 lane_id = lane_group.num_scheduled_; lane_id < lane_group.num_lanes_; ++lane_id) { + if (lane_group.IsLowLatency()) { + u32 worker_id = (count_lowlat_ % count_lowlat) + off_lowlat; + count_lowlat_ += 1; + Worker &worker = *HRUN_WORK_ORCHESTRATOR->workers_[worker_id]; + worker.PollQueues({WorkEntry(lane_group.prio_, lane_id, &queue)}); + HILOG(kDebug, "Scheduling the queue {} (lane {}, worker {})", queue.id_, lane_id, worker_id); + } else { + u32 worker_id = (count_highlat_ % count_highlat); + count_highlat_ += 1; + Worker &worker = *HRUN_WORK_ORCHESTRATOR->workers_[worker_id]; + worker.PollQueues({WorkEntry(lane_group.prio_, lane_id, &queue)}); + HILOG(kDebug, "Scheduling the queue {} (lane {}, worker {})", queue.id_, lane_id, worker_id); } - lane_group.num_scheduled_ = lane_group.num_lanes_; } + lane_group.num_scheduled_ = lane_group.num_lanes_; } } } From a489b0c52e1a0c4a127c92dce5f8a974c4abf48b Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Thu, 9 Nov 2023 09:27:01 -0600 Subject: [PATCH 072/114] Use main alloc instead of data alloc --- hrun/include/hrun/api/hrun_client.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hrun/include/hrun/api/hrun_client.h b/hrun/include/hrun/api/hrun_client.h index 07e3238b9..8416246b1 100644 --- a/hrun/include/hrun/api/hrun_client.h +++ b/hrun/include/hrun/api/hrun_client.h @@ -244,7 +244,7 @@ class Client : public ConfigurationManager { HSHM_ALWAYS_INLINE LPointer AllocateBufferServer(size_t size, Task *yield_task) { LPointer p; - p = data_alloc_->AllocateLocalPtr(size); + p = main_alloc_->AllocateLocalPtr(size); return p; } @@ -253,7 +253,7 @@ class Client : public ConfigurationManager { HSHM_ALWAYS_INLINE LPointer AllocateBufferServer(size_t size) { LPointer p; - p = data_alloc_->AllocateLocalPtr(size); + p = main_alloc_->AllocateLocalPtr(size); return p; } From 583c34d7558e931d0d38ec0b90351dab5e5cb25c Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Thu, 9 Nov 2023 09:52:16 -0600 Subject: [PATCH 073/114] Local flush test --- test/unit/hermes/test_bucket.cc | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/test/unit/hermes/test_bucket.cc b/test/unit/hermes/test_bucket.cc index 71449d3a6..aae827175 100644 --- a/test/unit/hermes/test_bucket.cc +++ b/test/unit/hermes/test_bucket.cc @@ -120,6 +120,36 @@ TEST_CASE("TestHermesAsyncPut") { HRUN_ADMIN->FlushRoot(DomainId::GetGlobal()); } +TEST_CASE("TestHermesAsyncPutLocalFlush") { + int rank, nprocs; + MPI_Barrier(MPI_COMM_WORLD); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs); + + // Initialize Hermes on all nodes + HERMES->ClientInit(); + + // Create a bucket + hermes::Context ctx; + hermes::Bucket bkt("hello"); + + size_t count_per_proc = 256; + size_t off = rank * count_per_proc; + size_t proc_count = off + count_per_proc; + for (size_t i = off; i < proc_count; ++i) { + HILOG(kInfo, "Iteration: {}", i); + // Put a blob + hermes::Blob blob(MEGABYTES(1)); + memset(blob.data(), i % 256, blob.size()); + bkt.AsyncPut(std::to_string(i), blob, ctx); + } + MPI_Barrier(MPI_COMM_WORLD); + if (rank == 0) { + HRUN_ADMIN->FlushRoot(DomainId::GetLocal()); + } + MPI_Barrier(MPI_COMM_WORLD); +} + TEST_CASE("TestHermesPutGet") { int rank, nprocs; MPI_Barrier(MPI_COMM_WORLD); From e28f59a4a51e21a3dcf8d70db4826ed00ab61866 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Thu, 9 Nov 2023 23:27:05 -0600 Subject: [PATCH 074/114] Use yield instead of flush --- hrun/tasks_required/hrun_admin/include/hrun_admin/hrun_admin.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hrun/tasks_required/hrun_admin/include/hrun_admin/hrun_admin.h b/hrun/tasks_required/hrun_admin/include/hrun_admin/hrun_admin.h index 40fc424c9..a6718df88 100644 --- a/hrun/tasks_required/hrun_admin/include/hrun_admin/hrun_admin.h +++ b/hrun/tasks_required/hrun_admin/include/hrun_admin/hrun_admin.h @@ -240,7 +240,8 @@ class Client : public TaskLibClient { if (!p.shm_.IsNull()) { break; } - FlushRoot(DomainId::GetLocal()); + // FlushRoot(DomainId::GetLocal()); + HRUN_CLIENT->Yield(); // HILOG(kInfo, "{} Could not allocate buffer of size {} (1)?", THREAD_MODEL, size); } return p; From 18227044c2a7be5b88b416d2784e3ed59e3287c0 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Thu, 9 Nov 2023 23:33:37 -0600 Subject: [PATCH 075/114] Make heap size debug mode --- hrun/include/hrun/api/hrun_client.h | 4 ++-- hrun/include/hrun/queue_manager/queues/mpsc_queue.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/hrun/include/hrun/api/hrun_client.h b/hrun/include/hrun/api/hrun_client.h index 8416246b1..2587f0d97 100644 --- a/hrun/include/hrun/api/hrun_client.h +++ b/hrun/include/hrun/api/hrun_client.h @@ -262,7 +262,7 @@ class Client : public ConfigurationManager { void FreeBuffer(hipc::Pointer &p) { auto alloc = HERMES_MEMORY_MANAGER->GetAllocator(p.allocator_id_); alloc->Free(p); - HILOG(kInfo, "Heap size (1) for {}/{}: {}", + HILOG(kDebug, "Heap size (1) for {}/{}: {}", p.allocator_id_.bits_.major_, p.allocator_id_.bits_.minor_, data_alloc_->GetCurrentlyAllocatedSize()); @@ -273,7 +273,7 @@ class Client : public ConfigurationManager { void FreeBuffer(LPointer &p) { auto alloc = HERMES_MEMORY_MANAGER->GetAllocator(p.shm_.allocator_id_); alloc->FreeLocalPtr(p); - HILOG(kInfo, "Heap size (2) for {}/{}: {}", + HILOG(kDebug, "Heap size (2) for {}/{}: {}", alloc->GetId().bits_.major_, alloc->GetId().bits_.minor_, data_alloc_->GetCurrentlyAllocatedSize()); diff --git a/hrun/include/hrun/queue_manager/queues/mpsc_queue.h b/hrun/include/hrun/queue_manager/queues/mpsc_queue.h index 7b403b435..d83883a6a 100644 --- a/hrun/include/hrun/queue_manager/queues/mpsc_queue.h +++ b/hrun/include/hrun/queue_manager/queues/mpsc_queue.h @@ -174,7 +174,7 @@ class mpsc_queue : public ShmContainer { // Check if there's space in the queue. if (size > queue.size()) { - HILOG(kInfo, "Queue {}/{} is full, waiting for space", id_, queue_->size()); + HILOG(kDebug, "Queue {}/{} is full, waiting for space", id_, queue_->size()); while (true) { head = head_.load(); size = tail - head + 1; From f7a4219c5097e9f4b468be5163260fc0e848ccfe Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Fri, 10 Nov 2023 00:06:45 -0600 Subject: [PATCH 076/114] Make queue being full info --- hrun/config/hrun_server_default.yaml | 5 +++-- hrun/include/hrun/queue_manager/queues/mpsc_queue.h | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/hrun/config/hrun_server_default.yaml b/hrun/config/hrun_server_default.yaml index 998d2ed9b..44a3e878c 100644 --- a/hrun/config/hrun_server_default.yaml +++ b/hrun/config/hrun_server_default.yaml @@ -1,7 +1,8 @@ ### Runtime orchestration settings work_orchestrator: - # The number of worker threads to spawn - max_workers: 4 + # The max number of dedicated worker threads + max_dworkers: 4 + # The max number of overlapping threads ### Queue Manager settings queue_manager: diff --git a/hrun/include/hrun/queue_manager/queues/mpsc_queue.h b/hrun/include/hrun/queue_manager/queues/mpsc_queue.h index d83883a6a..642a0987c 100644 --- a/hrun/include/hrun/queue_manager/queues/mpsc_queue.h +++ b/hrun/include/hrun/queue_manager/queues/mpsc_queue.h @@ -174,7 +174,7 @@ class mpsc_queue : public ShmContainer { // Check if there's space in the queue. if (size > queue.size()) { - HILOG(kDebug, "Queue {}/{} is full, waiting for space", id_, queue_->size()); + HILOG(kInfo, "Queue {}/{} is full, waiting for space", id_, queue_->size()); while (true) { head = head_.load(); size = tail - head + 1; @@ -183,6 +183,7 @@ class mpsc_queue : public ShmContainer { } HERMES_THREAD_MODEL->Yield(); } + HILOG(kInfo, "Queue {}/{} got scheduled", id_, queue_->size()); } // Emplace into queue at our slot From 27a8f943fb321bfaa4d5ae97038d8af3044b8d21 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Fri, 10 Nov 2023 00:37:15 -0600 Subject: [PATCH 077/114] Change work sched policy --- config/hermes_server_default.yaml | 8 ++++++-- hrun/config/hrun_server_default.yaml | 3 +++ hrun/include/hrun/config/config_server.h | 8 ++++++-- .../include/hrun/config/config_server_default.h | 8 ++++++-- .../hrun/work_orchestrator/work_orchestrator.h | 2 ++ hrun/include/hrun/work_orchestrator/worker.h | 10 ++++++++-- hrun/src/config_server.cc | 10 ++++++++-- hrun/src/work_orchestrator.cc | 15 +++++++++++++-- .../src/worch_queue_round_robin.cc | 17 ++++++----------- include/hermes/config_server_default.h | 8 ++++++-- test/unit/hermes/config/hermes_server.yaml | 5 +++-- 11 files changed, 67 insertions(+), 27 deletions(-) diff --git a/config/hermes_server_default.yaml b/config/hermes_server_default.yaml index ee9aa3e3e..8a25d43f9 100644 --- a/config/hermes_server_default.yaml +++ b/config/hermes_server_default.yaml @@ -134,8 +134,12 @@ system_view_state_update_interval_ms: 1000 ### Runtime orchestration settings work_orchestrator: - # The number of worker threads to spawn - max_workers: 4 + # The max number of dedicated worker threads + max_dworkers: 4 + # The max number of overlapping threads + max_oworkers: 32 + # The max number of total dedicated cores + owork_per_core: 32 ### Queue Manager settings queue_manager: diff --git a/hrun/config/hrun_server_default.yaml b/hrun/config/hrun_server_default.yaml index 44a3e878c..c4578eb36 100644 --- a/hrun/config/hrun_server_default.yaml +++ b/hrun/config/hrun_server_default.yaml @@ -3,6 +3,9 @@ work_orchestrator: # The max number of dedicated worker threads max_dworkers: 4 # The max number of overlapping threads + max_oworkers: 32 + # The max number of total dedicated cores + owork_per_core: 32 ### Queue Manager settings queue_manager: diff --git a/hrun/include/hrun/config/config_server.h b/hrun/include/hrun/config/config_server.h index 6651a852a..bdda384a1 100644 --- a/hrun/include/hrun/config/config_server.h +++ b/hrun/include/hrun/config/config_server.h @@ -22,8 +22,12 @@ namespace hrun::config { * Work orchestrator information defined in server config * */ struct WorkOrchestratorInfo { - /** Maximum number of workers to spawn */ - size_t max_workers_; + /** Maximum number of dedicated workers */ + size_t max_dworkers_; + /** Maximum number of overlapping workers */ + size_t max_oworkers_; + /** Overlapped workers per core */ + size_t owork_per_core_; }; /** diff --git a/hrun/include/hrun/config/config_server_default.h b/hrun/include/hrun/config/config_server_default.h index 6b57c8cf7..5e3b8287e 100644 --- a/hrun/include/hrun/config/config_server_default.h +++ b/hrun/include/hrun/config/config_server_default.h @@ -3,8 +3,12 @@ const inline char* kHrunServerDefaultConfigStr = "### Runtime orchestration settings\n" "work_orchestrator:\n" -" # The number of worker threads to spawn\n" -" max_workers: 4\n" +" # The max number of dedicated worker threads\n" +" max_dworkers: 4\n" +" # The max number of overlapping threads\n" +" max_oworkers: 32\n" +" # The max number of total dedicated cores\n" +" owork_per_core: 32\n" "\n" "### Queue Manager settings\n" "queue_manager:\n" diff --git a/hrun/include/hrun/work_orchestrator/work_orchestrator.h b/hrun/include/hrun/work_orchestrator/work_orchestrator.h index f7c1574e9..569c0293e 100644 --- a/hrun/include/hrun/work_orchestrator/work_orchestrator.h +++ b/hrun/include/hrun/work_orchestrator/work_orchestrator.h @@ -26,6 +26,8 @@ class WorkOrchestrator { public: ServerConfig *config_; /**< The server configuration */ std::vector> workers_; /**< Workers execute tasks */ + std::vector dworkers_; /**< Core-dedicated workers */ + std::vector oworkers_; /**< Undedicated workers */ std::atomic stop_runtime_; /**< Begin killing the runtime */ std::atomic kill_requested_; /**< Kill flushing threads eventually */ ABT_xstream xstream_; diff --git a/hrun/include/hrun/work_orchestrator/worker.h b/hrun/include/hrun/work_orchestrator/worker.h index 9d99c64cf..0b4862e58 100644 --- a/hrun/include/hrun/work_orchestrator/worker.h +++ b/hrun/include/hrun/work_orchestrator/worker.h @@ -173,7 +173,6 @@ class Worker { relinquish_queues_.Resize(1024); id_ = id; sleep_us_ = 0; - EnableContinuousPolling(); retries_ = 1; pid_ = 0; thread_ = std::make_unique(&Worker::Loop, this); @@ -262,6 +261,11 @@ class Worker { flags_.UnsetBits(WORKER_CONTINUOUS_POLLING); } + /** Check if continuously polling */ + bool IsContinuousPolling() { + return flags_.Any(WORKER_CONTINUOUS_POLLING); + } + /** Set the CPU affinity of this worker */ void SetCpuAffinity(int cpu_id) { ProcessAffiner::SetCpuAffinity(pid_, cpu_id); @@ -297,7 +301,9 @@ class Worker { } catch (hshm::Error &e) { HELOG(kFatal, "(node {}) Worker {} caught an error: {}", HRUN_CLIENT->node_id_, id_, e.what()); } - // Yield(); + if (!IsContinuousPolling()) { + Yield(); + } } Run(); } diff --git a/hrun/src/config_server.cc b/hrun/src/config_server.cc index f6a21c33a..108df2649 100644 --- a/hrun/src/config_server.cc +++ b/hrun/src/config_server.cc @@ -23,8 +23,14 @@ namespace hrun::config { /** parse work orchestrator info from YAML config */ void ServerConfig::ParseWorkOrchestrator(YAML::Node yaml_conf) { - if (yaml_conf["max_workers"]) { - wo_.max_workers_ = yaml_conf["max_workers"].as(); + if (yaml_conf["max_dworkers"]) { + wo_.max_dworkers_ = yaml_conf["max_dworkers"].as(); + } + if (yaml_conf["max_oworkers"]) { + wo_.max_oworkers_ = yaml_conf["max_oworkers"].as(); + } + if (yaml_conf["owork_per_core"]) { + wo_.owork_per_core_ = yaml_conf["owork_per_core"].as(); } } diff --git a/hrun/src/work_orchestrator.cc b/hrun/src/work_orchestrator.cc index ec9294f6a..49bdfee4c 100644 --- a/hrun/src/work_orchestrator.cc +++ b/hrun/src/work_orchestrator.cc @@ -28,12 +28,23 @@ void WorkOrchestrator::ServerInit(ServerConfig *config, QueueManager &qm) { } // Spawn workers on the stream - size_t num_workers = config_->wo_.max_workers_; + size_t num_workers = config_->wo_.max_dworkers_ + config->wo_.max_oworkers_; workers_.reserve(num_workers); - for (u32 worker_id = 0; worker_id < num_workers; ++worker_id) { + int worker_id = 0; + u32 last_cpu = config_->wo_.max_dworkers_; + for (; worker_id < config_->wo_.max_dworkers_; ++worker_id) { workers_.emplace_back(std::make_unique(worker_id, xstream_)); Worker &worker = *workers_.back(); worker.SetCpuAffinity(worker_id % HERMES_SYSTEM_INFO->ncpu_); + worker.EnableContinuousPolling(); + dworkers_.emplace_back(&worker); + } + for (; worker_id < num_workers; ++worker_id) { + workers_.emplace_back(std::make_unique(worker_id, xstream_)); + Worker &worker = *workers_.back(); + worker.SetCpuAffinity((int)(last_cpu + worker_id / config->wo_.owork_per_core_)); + worker.DisableContinuousPolling(); + oworkers_.emplace_back(&worker); } stop_runtime_ = false; kill_requested_ = false; diff --git a/hrun/tasks_required/worch_queue_round_robin/src/worch_queue_round_robin.cc b/hrun/tasks_required/worch_queue_round_robin/src/worch_queue_round_robin.cc index 6bb2a057a..e94c8b85b 100644 --- a/hrun/tasks_required/worch_queue_round_robin/src/worch_queue_round_robin.cc +++ b/hrun/tasks_required/worch_queue_round_robin/src/worch_queue_round_robin.cc @@ -46,24 +46,19 @@ class Server : public TaskLib { continue; } for (LaneGroup &lane_group : *queue.groups_) { - u32 rem_workers = HRUN_WORK_ORCHESTRATOR->workers_.size(); - u32 count_highlat = rem_workers / 2; - rem_workers -= count_highlat; - u32 off_lowlat = count_highlat; - u32 count_lowlat = rem_workers; for (u32 lane_id = lane_group.num_scheduled_; lane_id < lane_group.num_lanes_; ++lane_id) { if (lane_group.IsLowLatency()) { - u32 worker_id = (count_lowlat_ % count_lowlat) + off_lowlat; + u32 worker_off = count_lowlat_ % HRUN_WORK_ORCHESTRATOR->dworkers_.size(); count_lowlat_ += 1; - Worker &worker = *HRUN_WORK_ORCHESTRATOR->workers_[worker_id]; + Worker &worker = *HRUN_WORK_ORCHESTRATOR->dworkers_[worker_off]; worker.PollQueues({WorkEntry(lane_group.prio_, lane_id, &queue)}); - HILOG(kDebug, "Scheduling the queue {} (lane {}, worker {})", queue.id_, lane_id, worker_id); + HILOG(kDebug, "Scheduling the queue {} (lane {}, worker {})", queue.id_, lane_id, worker.id_); } else { - u32 worker_id = (count_highlat_ % count_highlat); + u32 worker_off = count_highlat_ % HRUN_WORK_ORCHESTRATOR->oworkers_.size(); count_highlat_ += 1; - Worker &worker = *HRUN_WORK_ORCHESTRATOR->workers_[worker_id]; + Worker &worker = *HRUN_WORK_ORCHESTRATOR->oworkers_[worker_off]; worker.PollQueues({WorkEntry(lane_group.prio_, lane_id, &queue)}); - HILOG(kDebug, "Scheduling the queue {} (lane {}, worker {})", queue.id_, lane_id, worker_id); + HILOG(kDebug, "Scheduling the queue {} (lane {}, worker {})", queue.id_, lane_id, worker_off); } } lane_group.num_scheduled_ = lane_group.num_lanes_; diff --git a/include/hermes/config_server_default.h b/include/hermes/config_server_default.h index 90ade21ad..203508ef0 100644 --- a/include/hermes/config_server_default.h +++ b/include/hermes/config_server_default.h @@ -137,8 +137,12 @@ const inline char* kHermesServerDefaultConfigStr = "\n" "### Runtime orchestration settings\n" "work_orchestrator:\n" -" # The number of worker threads to spawn\n" -" max_workers: 4\n" +" # The max number of dedicated worker threads\n" +" max_dworkers: 4\n" +" # The max number of overlapping threads\n" +" max_oworkers: 32\n" +" # The max number of total dedicated cores\n" +" owork_per_core: 32\n" "\n" "### Queue Manager settings\n" "queue_manager:\n" diff --git a/test/unit/hermes/config/hermes_server.yaml b/test/unit/hermes/config/hermes_server.yaml index a31c0c7d6..14cf03b81 100644 --- a/test/unit/hermes/config/hermes_server.yaml +++ b/test/unit/hermes/config/hermes_server.yaml @@ -90,8 +90,9 @@ system_view_state_update_interval_ms: 1000 ### Runtime orchestration settings work_orchestrator: - # The number of worker threads to spawn - max_workers: 4 + max_dworkers: 4 + max_oworkers: 32 + owork_per_core: 32 ### Queue Manager settings queue_manager: From 472339bef98037e4647659387963c6f372f2be08 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Fri, 10 Nov 2023 00:47:04 -0600 Subject: [PATCH 078/114] Print worker affinities --- hrun/include/hrun/work_orchestrator/worker.h | 1 + 1 file changed, 1 insertion(+) diff --git a/hrun/include/hrun/work_orchestrator/worker.h b/hrun/include/hrun/work_orchestrator/worker.h index 0b4862e58..5aca2f575 100644 --- a/hrun/include/hrun/work_orchestrator/worker.h +++ b/hrun/include/hrun/work_orchestrator/worker.h @@ -269,6 +269,7 @@ class Worker { /** Set the CPU affinity of this worker */ void SetCpuAffinity(int cpu_id) { ProcessAffiner::SetCpuAffinity(pid_, cpu_id); + HILOG(kInfo, "Affining worker {} to {}", id_, cpu_id); } /** Worker yields for a period of time */ From ad2725343996d2d3e13733b3da79e11c6618c162 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Fri, 10 Nov 2023 01:03:15 -0600 Subject: [PATCH 079/114] Add high latency priority --- hrun/include/hrun/task_registry/task.h | 1 + tasks/bdev/include/bdev/bdev.h | 1 + tasks/bdev/include/bdev/bdev_tasks.h | 6 +++++- 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/hrun/include/hrun/task_registry/task.h b/hrun/include/hrun/task_registry/task.h index ddd3a0115..6c8fb1f43 100644 --- a/hrun/include/hrun/task_registry/task.h +++ b/hrun/include/hrun/task_registry/task.h @@ -232,6 +232,7 @@ class TaskPrio { TASK_PRIO_T kAdmin = 0; TASK_PRIO_T kLongRunning = 1; TASK_PRIO_T kLowLatency = 2; + TASK_PRIO_T kHighLatency = 3; }; diff --git a/tasks/bdev/include/bdev/bdev.h b/tasks/bdev/include/bdev/bdev.h index 873912406..9e4b89490 100644 --- a/tasks/bdev/include/bdev/bdev.h +++ b/tasks/bdev/include/bdev/bdev.h @@ -52,6 +52,7 @@ class Client : public TaskLibClient { std::vector queue_info = { {1, 1, qm.queue_depth_, 0}, {1, 1, qm.queue_depth_, QUEUE_LONG_RUNNING}, + {qm.max_lanes_, qm.max_lanes_, qm.queue_depth_, QUEUE_LOW_LATENCY}, {qm.max_lanes_, qm.max_lanes_, qm.queue_depth_, 0} }; return HRUN_ADMIN->AsyncCreateTaskState( diff --git a/tasks/bdev/include/bdev/bdev_tasks.h b/tasks/bdev/include/bdev/bdev_tasks.h index 94cc2003a..3a68af6f7 100644 --- a/tasks/bdev/include/bdev/bdev_tasks.h +++ b/tasks/bdev/include/bdev/bdev_tasks.h @@ -184,7 +184,11 @@ struct WriteTask : public Task, TaskFlags { // Initialize task task_node_ = task_node; lane_hash_ = disk_off; - prio_ = TaskPrio::kLowLatency; + if (size < KILOBYTES(4)) { + prio_ = TaskPrio::kLowLatency; + } else { + prio_ = TaskPrio::kHighLatency; + } task_state_ = state_id; method_ = Method::kWrite; task_flags_.SetBits(TASK_UNORDERED | TASK_REMOTE_DEBUG_MARK); From bb10f2e834b38cd256a933fcf99722be9d2a9b96 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Fri, 10 Nov 2023 01:03:50 -0600 Subject: [PATCH 080/114] Make low latency < 8KB --- tasks/bdev/include/bdev/bdev_tasks.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/bdev/include/bdev/bdev_tasks.h b/tasks/bdev/include/bdev/bdev_tasks.h index 3a68af6f7..9ef4e7759 100644 --- a/tasks/bdev/include/bdev/bdev_tasks.h +++ b/tasks/bdev/include/bdev/bdev_tasks.h @@ -184,7 +184,7 @@ struct WriteTask : public Task, TaskFlags { // Initialize task task_node_ = task_node; lane_hash_ = disk_off; - if (size < KILOBYTES(4)) { + if (size < KILOBYTES(8)) { prio_ = TaskPrio::kLowLatency; } else { prio_ = TaskPrio::kHighLatency; From 7ed2f9f1b755a29379c012edb9b2cf789e5bc210 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Fri, 10 Nov 2023 01:04:42 -0600 Subject: [PATCH 081/114] Only reads are low latency --- tasks/bdev/include/bdev/bdev_tasks.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tasks/bdev/include/bdev/bdev_tasks.h b/tasks/bdev/include/bdev/bdev_tasks.h index 9ef4e7759..4333ba3ad 100644 --- a/tasks/bdev/include/bdev/bdev_tasks.h +++ b/tasks/bdev/include/bdev/bdev_tasks.h @@ -184,11 +184,7 @@ struct WriteTask : public Task, TaskFlags { // Initialize task task_node_ = task_node; lane_hash_ = disk_off; - if (size < KILOBYTES(8)) { - prio_ = TaskPrio::kLowLatency; - } else { - prio_ = TaskPrio::kHighLatency; - } + prio_ = TaskPrio::kHighLatency; task_state_ = state_id; method_ = Method::kWrite; task_flags_.SetBits(TASK_UNORDERED | TASK_REMOTE_DEBUG_MARK); @@ -233,7 +229,11 @@ struct ReadTask : public Task, TaskFlags { // Initialize task task_node_ = task_node; lane_hash_ = disk_off; - prio_ = TaskPrio::kLowLatency; + if (size < KILOBYTES(8)) { + prio_ = TaskPrio::kLowLatency; + } else { + prio_ = TaskPrio::kHighLatency; + } task_state_ = state_id; method_ = Method::kRead; task_flags_.SetBits(TASK_UNORDERED | TASK_REMOTE_DEBUG_MARK); From ed664f8e91d03aae1efdf4acfbd0d542a207bc25 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Fri, 10 Nov 2023 02:08:11 -0600 Subject: [PATCH 082/114] PutGetTest with flush --- benchmark/hermes_api_bench.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/benchmark/hermes_api_bench.cc b/benchmark/hermes_api_bench.cc index 19ba38889..77aef866b 100644 --- a/benchmark/hermes_api_bench.cc +++ b/benchmark/hermes_api_bench.cc @@ -78,6 +78,7 @@ void PutGetTest(int nprocs, int rank, int repeat, size_t blobs_per_rank, size_t blob_size) { PutTest(nprocs, rank, repeat, blobs_per_rank, blob_size); MPI_Barrier(MPI_COMM_WORLD); + HRUN_ADMIN->FlushRoot(DomainId::GetGlobal()); GetTest(nprocs, rank, repeat, blobs_per_rank, blob_size); } From c1d2253db69c9b7fe28676f320a4502c1bb6c3a6 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Fri, 10 Nov 2023 04:15:53 -0600 Subject: [PATCH 083/114] Add process affiner --- benchmark/test_latency.cc | 1 + hrun/include/hrun/task_registry/task.h | 9 ++-- .../include/hrun/work_orchestrator/affinity.h | 18 +++++++- .../work_orchestrator/work_orchestrator.h | 6 +++ hrun/include/hrun/work_orchestrator/worker.h | 42 +++++++++++-------- hrun/src/work_orchestrator.cc | 35 +++++++++++++--- .../src/worch_proc_round_robin.cc | 14 +++---- 7 files changed, 88 insertions(+), 37 deletions(-) diff --git a/benchmark/test_latency.cc b/benchmark/test_latency.cc index 852f15a4a..6b136c011 100644 --- a/benchmark/test_latency.cc +++ b/benchmark/test_latency.cc @@ -138,6 +138,7 @@ TEST_CASE("TestHshmQueueGetLane") { /** Single-thread performance of getting, emplacing, and popping a queue */ TEST_CASE("TestHshmQueueAllocateEmplacePop") { + TRANSPARENT_HERMES(); hrun::QueueId qid(0, 3); std::vector queue_info = { {16, 16, 256, 0} diff --git a/hrun/include/hrun/task_registry/task.h b/hrun/include/hrun/task_registry/task.h index 6c8fb1f43..d3ab5de8c 100644 --- a/hrun/include/hrun/task_registry/task.h +++ b/hrun/include/hrun/task_registry/task.h @@ -440,15 +440,14 @@ struct Task : public hipc::ShmContainer { /** Determine if time has elapsed */ HSHM_ALWAYS_INLINE bool ShouldRun(hshm::Timepoint &cur_time, bool flushing) { - if (!IsStarted() || flushing) { - start_ = cur_time; + if (!IsLongRunning()) { return true; } - if (IsLongRunning()) { - return start_.GetNsecFromStart(cur_time) >= period_ns_; - } else { + if (!IsStarted() || flushing) { + start_ = cur_time; return true; } + return start_.GetNsecFromStart(cur_time) >= period_ns_; } /** Mark this task as having been run */ diff --git a/hrun/include/hrun/work_orchestrator/affinity.h b/hrun/include/hrun/work_orchestrator/affinity.h index 0dbc32ab7..fcd578a09 100644 --- a/hrun/include/hrun/work_orchestrator/affinity.h +++ b/hrun/include/hrun/work_orchestrator/affinity.h @@ -39,6 +39,7 @@ class ProcessAffiner { private: int n_cpu_; cpu_set_t *cpus_; + std::vector ignore_pids_; public: ProcessAffiner() { @@ -69,10 +70,20 @@ class ProcessAffiner { } } + inline void SetCpus(const std::vector &cpu_ids) { + for (int cpu_id : cpu_ids) { + SetCpu(cpu_id); + } + } + inline void ClearCpu(int cpu) { CPU_CLR(cpu, cpus_); } + void IgnorePids(const std::vector &pids) { + ignore_pids_ = pids; + } + inline void ClearCpus(int off, int len) { for (int i = 0; i < len; ++i) { ClearCpu(off + i); @@ -98,10 +109,15 @@ class ProcessAffiner { // Iterate through all files and folders of /proc. while ((entry = readdir(procdir))) { // Skip anything that is not a PID folder. - if (!is_pid_folder(entry)) + if (!is_pid_folder(entry)) { continue; + } // Get the PID of the running process int proc_pid = atoi(entry->d_name); + if (std::find(ignore_pids_.begin(), ignore_pids_.end(), proc_pid) != + ignore_pids_.end()) { + continue; + } // Set the affinity of all running process to this mask count += Affine(proc_pid); } diff --git a/hrun/include/hrun/work_orchestrator/work_orchestrator.h b/hrun/include/hrun/work_orchestrator/work_orchestrator.h index 569c0293e..5dd8d17ac 100644 --- a/hrun/include/hrun/work_orchestrator/work_orchestrator.h +++ b/hrun/include/hrun/work_orchestrator/work_orchestrator.h @@ -51,6 +51,12 @@ class WorkOrchestrator { /** Get the number of workers */ size_t GetNumWorkers(); + /** Get all PIDs of active workers */ + std::vector GetWorkerPids(); + + /** Get the complement of worker cores */ + std::vector GetWorkerCoresComplement(); + /** Begin finalizing the runtime */ HSHM_ALWAYS_INLINE void FinalizeRuntime() { diff --git a/hrun/include/hrun/work_orchestrator/worker.h b/hrun/include/hrun/work_orchestrator/worker.h index 5aca2f575..94c6eee05 100644 --- a/hrun/include/hrun/work_orchestrator/worker.h +++ b/hrun/include/hrun/work_orchestrator/worker.h @@ -147,6 +147,7 @@ class Worker { ABT_thread tl_thread_; /**< The worker argobots thread handle */ int pthread_id_; /**< The worker pthread handle */ int pid_; /**< The worker process id */ + int affinity_; /**< The worker CPU affinity */ u32 numa_node_; // TODO(llogan): track NUMA affinity ABT_xstream xstream_; std::vector work_queue_; /**< The set of queues to poll */ @@ -161,6 +162,7 @@ class Worker { group_map_; /** Determine if a task can be executed right now */ hshm::charbuf group_; /** The current group */ WorkPending flush_; /** Info needed for flushing ops */ + hshm::Timepoint now_; /** The current timepoint */ public: /**=============================================================== @@ -168,13 +170,14 @@ class Worker { * =============================================================== */ /** Constructor */ - Worker(u32 id, ABT_xstream &xstream) { + Worker(u32 id, int cpu_id, ABT_xstream &xstream) { poll_queues_.Resize(1024); relinquish_queues_.Resize(1024); id_ = id; sleep_us_ = 0; retries_ = 1; pid_ = 0; + affinity_ = cpu_id; thread_ = std::make_unique(&Worker::Loop, this); pthread_id_ = thread_->native_handle(); // TODO(llogan): implement reserve for group @@ -268,8 +271,9 @@ class Worker { /** Set the CPU affinity of this worker */ void SetCpuAffinity(int cpu_id) { - ProcessAffiner::SetCpuAffinity(pid_, cpu_id); - HILOG(kInfo, "Affining worker {} to {}", id_, cpu_id); + HILOG(kInfo, "Affining worker {} (pid={}) to {}", id_, pid_, cpu_id); + affinity_ = cpu_id; + ProcessAffiner::SetCpuAffinity(pid_, affinity_); } /** Worker yields for a period of time */ @@ -291,17 +295,19 @@ class Worker { /** Worker loop iteration */ void Loop() { pid_ = GetLinuxTid(); + SetCpuAffinity(affinity_); WorkOrchestrator *orchestrator = HRUN_WORK_ORCHESTRATOR; + now_.Now(); while (orchestrator->IsAlive()) { - try { +// try { flush_.pending_ = 0; Run(); if (flush_.flushing_ && flush_.pending_ == 0) { flush_.flushing_ = false; } - } catch (hshm::Error &e) { - HELOG(kFatal, "(node {}) Worker {} caught an error: {}", HRUN_CLIENT->node_id_, id_, e.what()); - } +// } catch (hshm::Error &e) { +// HELOG(kFatal, "(node {}) Worker {} caught an error: {}", HRUN_CLIENT->node_id_, id_, e.what()); +// } if (!IsContinuousPolling()) { Yield(); } @@ -317,18 +323,18 @@ class Worker { if (relinquish_queues_.size() > 0) { _RelinquishQueues(); } - hshm::Timepoint now; - now.Now(); - for (WorkEntry &work_entry : work_queue_) { -// if (!work_entry.lane_->flags_.Any(QUEUE_LOW_LATENCY)) { -// work_entry.count_ += 1; -// if (work_entry.count_ % 4096 != 0) { -// continue; -// } -// } - work_entry.cur_time_ = now; - PollGrouped(work_entry); + if (!IsContinuousPolling()) { + now_.Now(); + for (WorkEntry &work_entry : work_queue_) { + work_entry.cur_time_ = now_; + PollGrouped(work_entry); + } + } else { + for (WorkEntry &work_entry : work_queue_) { + PollGrouped(work_entry); + } } + } /** Run an iteration over a particular queue */ diff --git a/hrun/src/work_orchestrator.cc b/hrun/src/work_orchestrator.cc index 49bdfee4c..ad3b023e0 100644 --- a/hrun/src/work_orchestrator.cc +++ b/hrun/src/work_orchestrator.cc @@ -33,27 +33,27 @@ void WorkOrchestrator::ServerInit(ServerConfig *config, QueueManager &qm) { int worker_id = 0; u32 last_cpu = config_->wo_.max_dworkers_; for (; worker_id < config_->wo_.max_dworkers_; ++worker_id) { - workers_.emplace_back(std::make_unique(worker_id, xstream_)); + int cpu_id = worker_id % HERMES_SYSTEM_INFO->ncpu_; + workers_.emplace_back(std::make_unique(worker_id, cpu_id, xstream_)); Worker &worker = *workers_.back(); - worker.SetCpuAffinity(worker_id % HERMES_SYSTEM_INFO->ncpu_); worker.EnableContinuousPolling(); dworkers_.emplace_back(&worker); } for (; worker_id < num_workers; ++worker_id) { - workers_.emplace_back(std::make_unique(worker_id, xstream_)); + int cpu_id = (int)(last_cpu + (worker_id - last_cpu) / config->wo_.owork_per_core_); + workers_.emplace_back(std::make_unique(worker_id, cpu_id, xstream_)); Worker &worker = *workers_.back(); - worker.SetCpuAffinity((int)(last_cpu + worker_id / config->wo_.owork_per_core_)); worker.DisableContinuousPolling(); oworkers_.emplace_back(&worker); } stop_runtime_ = false; kill_requested_ = false; - // Schedule admin queue on worker 0 + // Schedule admin queue on first overlapping worker MultiQueue *admin_queue = qm.GetQueue(qm.admin_queue_); LaneGroup *admin_group = &admin_queue->GetGroup(0); for (u32 lane_id = 0; lane_id < admin_group->num_lanes_; ++lane_id) { - Worker &worker = *workers_[0]; + Worker &worker = *oworkers_[0]; worker.PollQueues({WorkEntry(0, lane_id, admin_queue)}); } admin_group->num_scheduled_ = admin_group->num_lanes_; @@ -80,4 +80,27 @@ size_t WorkOrchestrator::GetNumWorkers() { return workers_.size(); } +/** Get all PIDs of active workers */ +std::vector WorkOrchestrator::GetWorkerPids() { + std::vector pids; + pids.reserve(workers_.size()); + for (std::unique_ptr &worker : workers_) { + pids.push_back(worker->pid_); + } + return pids; +} + +/** Get the complement of worker cores */ +std::vector WorkOrchestrator::GetWorkerCoresComplement() { + std::vector cores; + cores.reserve(HERMES_SYSTEM_INFO->ncpu_); + for (int i = 0; i < HERMES_SYSTEM_INFO->ncpu_; ++i) { + cores.push_back(i); + } + for (std::unique_ptr &worker : workers_) { + cores.erase(std::remove(cores.begin(), cores.end(), worker->affinity_), cores.end()); + } + return cores; +} + } // namespace hrun diff --git a/hrun/tasks_required/worch_proc_round_robin/src/worch_proc_round_robin.cc b/hrun/tasks_required/worch_proc_round_robin/src/worch_proc_round_robin.cc index 457c1a89e..58be5c011 100644 --- a/hrun/tasks_required/worch_proc_round_robin/src/worch_proc_round_robin.cc +++ b/hrun/tasks_required/worch_proc_round_robin/src/worch_proc_round_robin.cc @@ -34,13 +34,13 @@ class Server : public TaskLib { /** Schedule running processes */ void Schedule(ScheduleTask *task, RunContext &rctx) { -// int rr = 0; -// ProcessAffiner affiner; -// affiner.AffineAll(); -// for (std::unique_ptr &worker : HRUN_WORK_ORCHESTRATOR->workers_) { -// worker->SetCpuAffinity(rr % HERMES_SYSTEM_INFO->ncpu_); -// ++rr; -// } + ProcessAffiner affiner; + std::vector worker_pids = HRUN_WORK_ORCHESTRATOR->GetWorkerPids(); + std::vector cpu_ids = HRUN_WORK_ORCHESTRATOR->GetWorkerCoresComplement(); + HILOG(kInfo, "Affining {} processes to {} cores", worker_pids.size(), cpu_ids.size()); + affiner.IgnorePids(worker_pids); + affiner.SetCpus(cpu_ids); + affiner.AffineAll(); } void MonitorSchedule(u32 mode, ScheduleTask *task, RunContext &rctx) { } From 27a111d3ef49fb76736828293f5e4951966b9afc Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Fri, 10 Nov 2023 05:24:59 -0600 Subject: [PATCH 084/114] See if push queue still slow --- benchmark/test_latency.cc | 6 +++--- hrun/include/hrun/work_orchestrator/worker.h | 18 ++++++++++++++---- .../proc_queue/src/proc_queue.cc | 2 ++ 3 files changed, 19 insertions(+), 7 deletions(-) diff --git a/benchmark/test_latency.cc b/benchmark/test_latency.cc index 6b136c011..53267cc10 100644 --- a/benchmark/test_latency.cc +++ b/benchmark/test_latency.cc @@ -252,8 +252,7 @@ TEST_CASE("TestWorkerLatency") { /** Time to process a request */ TEST_CASE("TestRoundTripLatency") { - TRANSPARENT_HRUN(); - HERMES->ClientInit(); + TRANSPARENT_HERMES(); hrun::small_message::Client client; HRUN_ADMIN->RegisterTaskLibRoot(hrun::DomainId::GetLocal(), "small_message"); // int count = 25; @@ -271,7 +270,8 @@ TEST_CASE("TestRoundTripLatency") { size_t ops = (1 << 20); // size_t ops = 1024; for (size_t i = 0; i < ops; ++i) { - client.MdPushRoot(hrun::DomainId::GetLocal()); + client.MdRoot(hrun::DomainId::GetLocal()); + // client.MdPushRoot(hrun::DomainId::GetLocal()); } t.Pause(); diff --git a/hrun/include/hrun/work_orchestrator/worker.h b/hrun/include/hrun/work_orchestrator/worker.h index 94c6eee05..d65d38df0 100644 --- a/hrun/include/hrun/work_orchestrator/worker.h +++ b/hrun/include/hrun/work_orchestrator/worker.h @@ -276,6 +276,13 @@ class Worker { ProcessAffiner::SetCpuAffinity(pid_, affinity_); } + /** Make maximum priority process */ + void MakeDedicated() { + int policy = SCHED_FIFO; + struct sched_param param = { .sched_priority = 1 }; + sched_setscheduler(0, policy, ¶m); + } + /** Worker yields for a period of time */ void Yield() { if (flags_.Any(WORKER_CONTINUOUS_POLLING)) { @@ -296,18 +303,21 @@ class Worker { void Loop() { pid_ = GetLinuxTid(); SetCpuAffinity(affinity_); + if (IsContinuousPolling()) { + MakeDedicated(); + } WorkOrchestrator *orchestrator = HRUN_WORK_ORCHESTRATOR; now_.Now(); while (orchestrator->IsAlive()) { -// try { + try { flush_.pending_ = 0; Run(); if (flush_.flushing_ && flush_.pending_ == 0) { flush_.flushing_ = false; } -// } catch (hshm::Error &e) { -// HELOG(kFatal, "(node {}) Worker {} caught an error: {}", HRUN_CLIENT->node_id_, id_, e.what()); -// } + } catch (hshm::Error &e) { + HELOG(kFatal, "(node {}) Worker {} caught an error: {}", HRUN_CLIENT->node_id_, id_, e.what()); + } if (!IsContinuousPolling()) { Yield(); } diff --git a/hrun/tasks_required/proc_queue/src/proc_queue.cc b/hrun/tasks_required/proc_queue/src/proc_queue.cc index 9a4af3378..81f10462b 100644 --- a/hrun/tasks_required/proc_queue/src/proc_queue.cc +++ b/hrun/tasks_required/proc_queue/src/proc_queue.cc @@ -48,6 +48,8 @@ class Server : public TaskLib { task->is_fire_forget_ = true; } MultiQueue *real_queue = HRUN_CLIENT->GetQueue(QueueId(ptr->task_state_)); + task->SetModuleComplete(); + return; real_queue->Emplace(ptr->prio_, ptr->lane_hash_, task->sub_run_.shm_); task->phase_ = PushTaskPhase::kWaitSchedule; } From 1a592d9cd5ffe496e8aad3171354dfe385d9c8c6 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Fri, 10 Nov 2023 05:25:21 -0600 Subject: [PATCH 085/114] Uncomment push queue changes --- hrun/tasks_required/proc_queue/src/proc_queue.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/hrun/tasks_required/proc_queue/src/proc_queue.cc b/hrun/tasks_required/proc_queue/src/proc_queue.cc index 81f10462b..9a4af3378 100644 --- a/hrun/tasks_required/proc_queue/src/proc_queue.cc +++ b/hrun/tasks_required/proc_queue/src/proc_queue.cc @@ -48,8 +48,6 @@ class Server : public TaskLib { task->is_fire_forget_ = true; } MultiQueue *real_queue = HRUN_CLIENT->GetQueue(QueueId(ptr->task_state_)); - task->SetModuleComplete(); - return; real_queue->Emplace(ptr->prio_, ptr->lane_hash_, task->sub_run_.shm_); task->phase_ = PushTaskPhase::kWaitSchedule; } From 5894a94a71ec3b50e892b788492df2c1e4923c90 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Fri, 10 Nov 2023 05:35:15 -0600 Subject: [PATCH 086/114] Count the processes --- .../worch_proc_round_robin/src/worch_proc_round_robin.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hrun/tasks_required/worch_proc_round_robin/src/worch_proc_round_robin.cc b/hrun/tasks_required/worch_proc_round_robin/src/worch_proc_round_robin.cc index 58be5c011..a5fc4701f 100644 --- a/hrun/tasks_required/worch_proc_round_robin/src/worch_proc_round_robin.cc +++ b/hrun/tasks_required/worch_proc_round_robin/src/worch_proc_round_robin.cc @@ -37,10 +37,10 @@ class Server : public TaskLib { ProcessAffiner affiner; std::vector worker_pids = HRUN_WORK_ORCHESTRATOR->GetWorkerPids(); std::vector cpu_ids = HRUN_WORK_ORCHESTRATOR->GetWorkerCoresComplement(); - HILOG(kInfo, "Affining {} processes to {} cores", worker_pids.size(), cpu_ids.size()); affiner.IgnorePids(worker_pids); affiner.SetCpus(cpu_ids); - affiner.AffineAll(); + int count = affiner.AffineAll(); + HILOG(kInfo, "Affining {} processes to {} cores", count, cpu_ids.size()); } void MonitorSchedule(u32 mode, ScheduleTask *task, RunContext &rctx) { } From 81f8c3c9978e50fb1a67d36296836438fbad3ad5 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Fri, 10 Nov 2023 05:47:59 -0600 Subject: [PATCH 087/114] Wait for worker threads to spawn and affine initially --- benchmark/test_latency.cc | 4 +-- .../work_orchestrator/work_orchestrator.h | 3 ++ hrun/include/hrun/work_orchestrator/worker.h | 2 +- hrun/src/work_orchestrator.cc | 29 +++++++++++++++++++ .../src/worch_proc_round_robin.cc | 8 +---- 5 files changed, 36 insertions(+), 10 deletions(-) diff --git a/benchmark/test_latency.cc b/benchmark/test_latency.cc index 53267cc10..a3042c317 100644 --- a/benchmark/test_latency.cc +++ b/benchmark/test_latency.cc @@ -263,8 +263,8 @@ TEST_CASE("TestRoundTripLatency") { client.CreateRoot(hrun::DomainId::GetLocal(), "ipc_test"); hshm::Timer t; - int pid = getpid(); - ProcessAffiner::SetCpuAffinity(pid, 8); + // int pid = getpid(); + // ProcessAffiner::SetCpuAffinity(pid, 8); t.Resume(); size_t ops = (1 << 20); diff --git a/hrun/include/hrun/work_orchestrator/work_orchestrator.h b/hrun/include/hrun/work_orchestrator/work_orchestrator.h index 5dd8d17ac..6b7012746 100644 --- a/hrun/include/hrun/work_orchestrator/work_orchestrator.h +++ b/hrun/include/hrun/work_orchestrator/work_orchestrator.h @@ -57,6 +57,9 @@ class WorkOrchestrator { /** Get the complement of worker cores */ std::vector GetWorkerCoresComplement(); + /** Begin dedicating core s*/ + void DedicateCores(); + /** Begin finalizing the runtime */ HSHM_ALWAYS_INLINE void FinalizeRuntime() { diff --git a/hrun/include/hrun/work_orchestrator/worker.h b/hrun/include/hrun/work_orchestrator/worker.h index d65d38df0..b3b840948 100644 --- a/hrun/include/hrun/work_orchestrator/worker.h +++ b/hrun/include/hrun/work_orchestrator/worker.h @@ -146,7 +146,7 @@ class Worker { std::unique_ptr thread_; /**< The worker thread handle */ ABT_thread tl_thread_; /**< The worker argobots thread handle */ int pthread_id_; /**< The worker pthread handle */ - int pid_; /**< The worker process id */ + std::atomic pid_; /**< The worker process id */ int affinity_; /**< The worker CPU affinity */ u32 numa_node_; // TODO(llogan): track NUMA affinity ABT_xstream xstream_; diff --git a/hrun/src/work_orchestrator.cc b/hrun/src/work_orchestrator.cc index ad3b023e0..c56ea3d7f 100644 --- a/hrun/src/work_orchestrator.cc +++ b/hrun/src/work_orchestrator.cc @@ -49,6 +49,21 @@ void WorkOrchestrator::ServerInit(ServerConfig *config, QueueManager &qm) { stop_runtime_ = false; kill_requested_ = false; + // Wait for pids to become non-zero + while (true) { + bool all_pids_nonzero = true; + for (std::unique_ptr &worker : workers_) { + if (worker->pid_ == 0) { + all_pids_nonzero = false; + break; + } + } + if (all_pids_nonzero) { + break; + } + std::this_thread::sleep_for(std::chrono::milliseconds(5)); + } + // Schedule admin queue on first overlapping worker MultiQueue *admin_queue = qm.GetQueue(qm.admin_queue_); LaneGroup *admin_group = &admin_queue->GetGroup(0); @@ -58,6 +73,9 @@ void WorkOrchestrator::ServerInit(ServerConfig *config, QueueManager &qm) { } admin_group->num_scheduled_ = admin_group->num_lanes_; + // Dedicate CPU cores to this runtime + DedicateCores(); + HILOG(kInfo, "Started {} workers", num_workers); } @@ -103,4 +121,15 @@ std::vector WorkOrchestrator::GetWorkerCoresComplement() { return cores; } +/** Dedicate cores */ +void WorkOrchestrator::DedicateCores() { + ProcessAffiner affiner; + std::vector worker_pids = GetWorkerPids(); + std::vector cpu_ids = GetWorkerCoresComplement(); + affiner.IgnorePids(worker_pids); + affiner.SetCpus(cpu_ids); + int count = affiner.AffineAll(); + HILOG(kInfo, "Affining {} processes to {} cores", count, cpu_ids.size()); +} + } // namespace hrun diff --git a/hrun/tasks_required/worch_proc_round_robin/src/worch_proc_round_robin.cc b/hrun/tasks_required/worch_proc_round_robin/src/worch_proc_round_robin.cc index a5fc4701f..44e8a9e5d 100644 --- a/hrun/tasks_required/worch_proc_round_robin/src/worch_proc_round_robin.cc +++ b/hrun/tasks_required/worch_proc_round_robin/src/worch_proc_round_robin.cc @@ -34,13 +34,7 @@ class Server : public TaskLib { /** Schedule running processes */ void Schedule(ScheduleTask *task, RunContext &rctx) { - ProcessAffiner affiner; - std::vector worker_pids = HRUN_WORK_ORCHESTRATOR->GetWorkerPids(); - std::vector cpu_ids = HRUN_WORK_ORCHESTRATOR->GetWorkerCoresComplement(); - affiner.IgnorePids(worker_pids); - affiner.SetCpus(cpu_ids); - int count = affiner.AffineAll(); - HILOG(kInfo, "Affining {} processes to {} cores", count, cpu_ids.size()); + HRUN_WORK_ORCHESTRATOR->DedicateCores(); } void MonitorSchedule(u32 mode, ScheduleTask *task, RunContext &rctx) { } From b1a9111c70ae237ed25b17e189a146a6f52f2cd7 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Fri, 10 Nov 2023 05:57:35 -0600 Subject: [PATCH 088/114] Make loop inlined --- hrun/include/hrun/work_orchestrator/worker.h | 1 + 1 file changed, 1 insertion(+) diff --git a/hrun/include/hrun/work_orchestrator/worker.h b/hrun/include/hrun/work_orchestrator/worker.h index b3b840948..c93bd56e5 100644 --- a/hrun/include/hrun/work_orchestrator/worker.h +++ b/hrun/include/hrun/work_orchestrator/worker.h @@ -348,6 +348,7 @@ class Worker { } /** Run an iteration over a particular queue */ + HSHM_ALWAYS_INLINE void PollGrouped(WorkEntry &work_entry) { int off = 0; Lane *&lane = work_entry.lane_; From db9ada7092b5d1bab4afc5f0282b105b40e9e3c0 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Fri, 10 Nov 2023 06:20:37 -0600 Subject: [PATCH 089/114] Dedicated admin worker --- .../hrun/work_orchestrator/work_orchestrator.h | 1 + hrun/src/work_orchestrator.cc | 17 ++++++++++------- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/hrun/include/hrun/work_orchestrator/work_orchestrator.h b/hrun/include/hrun/work_orchestrator/work_orchestrator.h index 6b7012746..71443b693 100644 --- a/hrun/include/hrun/work_orchestrator/work_orchestrator.h +++ b/hrun/include/hrun/work_orchestrator/work_orchestrator.h @@ -28,6 +28,7 @@ class WorkOrchestrator { std::vector> workers_; /**< Workers execute tasks */ std::vector dworkers_; /**< Core-dedicated workers */ std::vector oworkers_; /**< Undedicated workers */ + Worker* admin_worker_; /**< Constantly polled admin worker */ std::atomic stop_runtime_; /**< Begin killing the runtime */ std::atomic kill_requested_; /**< Kill flushing threads eventually */ ABT_xstream xstream_; diff --git a/hrun/src/work_orchestrator.cc b/hrun/src/work_orchestrator.cc index c56ea3d7f..25f734ba0 100644 --- a/hrun/src/work_orchestrator.cc +++ b/hrun/src/work_orchestrator.cc @@ -31,16 +31,20 @@ void WorkOrchestrator::ServerInit(ServerConfig *config, QueueManager &qm) { size_t num_workers = config_->wo_.max_dworkers_ + config->wo_.max_oworkers_; workers_.reserve(num_workers); int worker_id = 0; - u32 last_cpu = config_->wo_.max_dworkers_; - for (; worker_id < config_->wo_.max_dworkers_; ++worker_id) { - int cpu_id = worker_id % HERMES_SYSTEM_INFO->ncpu_; + u32 num_dworkers = config_->wo_.max_dworkers_ + 1; + for (; worker_id < num_dworkers; ++worker_id) { + int cpu_id = worker_id; workers_.emplace_back(std::make_unique(worker_id, cpu_id, xstream_)); Worker &worker = *workers_.back(); worker.EnableContinuousPolling(); - dworkers_.emplace_back(&worker); + if (worker_id > 0) { + dworkers_.emplace_back(&worker); + } else { + admin_worker_ = &worker; + } } for (; worker_id < num_workers; ++worker_id) { - int cpu_id = (int)(last_cpu + (worker_id - last_cpu) / config->wo_.owork_per_core_); + int cpu_id = (int)(num_dworkers + (worker_id - num_dworkers) / config->wo_.owork_per_core_); workers_.emplace_back(std::make_unique(worker_id, cpu_id, xstream_)); Worker &worker = *workers_.back(); worker.DisableContinuousPolling(); @@ -68,8 +72,7 @@ void WorkOrchestrator::ServerInit(ServerConfig *config, QueueManager &qm) { MultiQueue *admin_queue = qm.GetQueue(qm.admin_queue_); LaneGroup *admin_group = &admin_queue->GetGroup(0); for (u32 lane_id = 0; lane_id < admin_group->num_lanes_; ++lane_id) { - Worker &worker = *oworkers_[0]; - worker.PollQueues({WorkEntry(0, lane_id, admin_queue)}); + admin_worker_->PollQueues({WorkEntry(0, lane_id, admin_queue)}); } admin_group->num_scheduled_ = admin_group->num_lanes_; From e3fd9a4a83be590fee099d103b2582e402c2b0ce Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Fri, 10 Nov 2023 06:29:50 -0600 Subject: [PATCH 090/114] Use normal queue depth --- .../remote_queue/include/remote_queue/remote_queue.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hrun/tasks_required/remote_queue/include/remote_queue/remote_queue.h b/hrun/tasks_required/remote_queue/include/remote_queue/remote_queue.h index 52496e811..c9457a3d6 100644 --- a/hrun/tasks_required/remote_queue/include/remote_queue/remote_queue.h +++ b/hrun/tasks_required/remote_queue/include/remote_queue/remote_queue.h @@ -35,9 +35,9 @@ class Client : public TaskLibClient { // NOTE(llogan): 32x queue depth b/c default num rpc threads is 32 std::vector queue_info = { {1, 1, qm.queue_depth_, 0}, - {1, 1, 32 * qm.queue_depth_, QUEUE_LONG_RUNNING}, + {1, 1, qm.queue_depth_, QUEUE_LONG_RUNNING}, // {qm.max_lanes_, qm.max_lanes_, qm.queue_depth_, QUEUE_LOW_LATENCY} - {1, 1, 32 * qm.queue_depth_, QUEUE_LOW_LATENCY}, + {1, 1, qm.queue_depth_, QUEUE_LOW_LATENCY}, }; return HRUN_ADMIN->AsyncCreateTaskState( task_node, domain_id, state_name, id_, queue_info); From 8909d8087901cdf476e28306d7ca2da3e93936ca Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Fri, 10 Nov 2023 06:38:49 -0600 Subject: [PATCH 091/114] Ensure admin is truly dedicated --- hrun/src/work_orchestrator.cc | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/hrun/src/work_orchestrator.cc b/hrun/src/work_orchestrator.cc index 25f734ba0..f8abfd370 100644 --- a/hrun/src/work_orchestrator.cc +++ b/hrun/src/work_orchestrator.cc @@ -28,21 +28,23 @@ void WorkOrchestrator::ServerInit(ServerConfig *config, QueueManager &qm) { } // Spawn workers on the stream - size_t num_workers = config_->wo_.max_dworkers_ + config->wo_.max_oworkers_; + size_t num_workers = config_->wo_.max_dworkers_ + config->wo_.max_oworkers_ + 1; workers_.reserve(num_workers); int worker_id = 0; - u32 num_dworkers = config_->wo_.max_dworkers_ + 1; + // Spawn admin worker + workers_.emplace_back(std::make_unique(worker_id, 0, xstream_)); + admin_worker_ = workers_.back().get(); + ++worker_id; + // Spawn dedicated workers (dworkers) + u32 num_dworkers = config_->wo_.max_dworkers_; for (; worker_id < num_dworkers; ++worker_id) { int cpu_id = worker_id; workers_.emplace_back(std::make_unique(worker_id, cpu_id, xstream_)); Worker &worker = *workers_.back(); worker.EnableContinuousPolling(); - if (worker_id > 0) { - dworkers_.emplace_back(&worker); - } else { - admin_worker_ = &worker; - } + dworkers_.emplace_back(&worker); } + // Spawn overlapped workers (oworkers) for (; worker_id < num_workers; ++worker_id) { int cpu_id = (int)(num_dworkers + (worker_id - num_dworkers) / config->wo_.owork_per_core_); workers_.emplace_back(std::make_unique(worker_id, cpu_id, xstream_)); From cad9885562782fb1a0ad8d865e3106764ebcec5f Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Fri, 10 Nov 2023 06:40:45 -0600 Subject: [PATCH 092/114] don't let the dedicator run rampantly --- .../worch_proc_round_robin/src/worch_proc_round_robin.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hrun/tasks_required/worch_proc_round_robin/src/worch_proc_round_robin.cc b/hrun/tasks_required/worch_proc_round_robin/src/worch_proc_round_robin.cc index 44e8a9e5d..6c6db3f94 100644 --- a/hrun/tasks_required/worch_proc_round_robin/src/worch_proc_round_robin.cc +++ b/hrun/tasks_required/worch_proc_round_robin/src/worch_proc_round_robin.cc @@ -34,7 +34,7 @@ class Server : public TaskLib { /** Schedule running processes */ void Schedule(ScheduleTask *task, RunContext &rctx) { - HRUN_WORK_ORCHESTRATOR->DedicateCores(); + // HRUN_WORK_ORCHESTRATOR->DedicateCores(); } void MonitorSchedule(u32 mode, ScheduleTask *task, RunContext &rctx) { } From 5e74ada37f291c1bd9aacc8bfca63a22eb5b909b Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Fri, 10 Nov 2023 06:42:32 -0600 Subject: [PATCH 093/114] Better print --- .../worch_queue_round_robin/src/worch_queue_round_robin.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/hrun/tasks_required/worch_queue_round_robin/src/worch_queue_round_robin.cc b/hrun/tasks_required/worch_queue_round_robin/src/worch_queue_round_robin.cc index e94c8b85b..38471ffb8 100644 --- a/hrun/tasks_required/worch_queue_round_robin/src/worch_queue_round_robin.cc +++ b/hrun/tasks_required/worch_queue_round_robin/src/worch_queue_round_robin.cc @@ -52,13 +52,15 @@ class Server : public TaskLib { count_lowlat_ += 1; Worker &worker = *HRUN_WORK_ORCHESTRATOR->dworkers_[worker_off]; worker.PollQueues({WorkEntry(lane_group.prio_, lane_id, &queue)}); - HILOG(kDebug, "Scheduling the queue {} (lane {}, worker {})", queue.id_, lane_id, worker.id_); + HILOG(kDebug, "(node {}) Scheduling the queue {} (lane {}, worker {})", + HRUN_CLIENT->node_id_, queue.id_, lane_id, worker.id_); } else { u32 worker_off = count_highlat_ % HRUN_WORK_ORCHESTRATOR->oworkers_.size(); count_highlat_ += 1; Worker &worker = *HRUN_WORK_ORCHESTRATOR->oworkers_[worker_off]; worker.PollQueues({WorkEntry(lane_group.prio_, lane_id, &queue)}); - HILOG(kDebug, "Scheduling the queue {} (lane {}, worker {})", queue.id_, lane_id, worker_off); + HILOG(kDebug, "(node {}) Scheduling the queue {} (lane {}, worker {})", + HRUN_CLIENT->node_id_, queue.id_, lane_id, worker_off); } } lane_group.num_scheduled_ = lane_group.num_lanes_; From a685e553e9f52f01ba39fe3003f2d51e2a947780 Mon Sep 17 00:00:00 2001 From: Luke Logan Date: Sun, 12 Nov 2023 09:41:14 -0700 Subject: [PATCH 094/114] Add start and end flushing --- benchmark/hermes_api_bench.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/benchmark/hermes_api_bench.cc b/benchmark/hermes_api_bench.cc index 77aef866b..f2ca8e805 100644 --- a/benchmark/hermes_api_bench.cc +++ b/benchmark/hermes_api_bench.cc @@ -78,7 +78,9 @@ void PutGetTest(int nprocs, int rank, int repeat, size_t blobs_per_rank, size_t blob_size) { PutTest(nprocs, rank, repeat, blobs_per_rank, blob_size); MPI_Barrier(MPI_COMM_WORLD); + HILOG(kInfo, "Beginning flushing") HRUN_ADMIN->FlushRoot(DomainId::GetGlobal()); + HILOG(kInfo, "Finished flushing") GetTest(nprocs, rank, repeat, blobs_per_rank, blob_size); } From 75b00dc3615196665ff3bbf1d526ff5120692264 Mon Sep 17 00:00:00 2001 From: Luke Logan Date: Sun, 12 Nov 2023 09:58:43 -0700 Subject: [PATCH 095/114] also print barrier --- benchmark/hermes_api_bench.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/benchmark/hermes_api_bench.cc b/benchmark/hermes_api_bench.cc index f2ca8e805..4622ad2af 100644 --- a/benchmark/hermes_api_bench.cc +++ b/benchmark/hermes_api_bench.cc @@ -77,6 +77,7 @@ void GetTest(int nprocs, int rank, void PutGetTest(int nprocs, int rank, int repeat, size_t blobs_per_rank, size_t blob_size) { PutTest(nprocs, rank, repeat, blobs_per_rank, blob_size); + HILOG(kInfo, "Beginning barrier") MPI_Barrier(MPI_COMM_WORLD); HILOG(kInfo, "Beginning flushing") HRUN_ADMIN->FlushRoot(DomainId::GetGlobal()); From cacdee84040419b97450852270d9f002f6d437c9 Mon Sep 17 00:00:00 2001 From: Luke Logan Date: Sun, 12 Nov 2023 18:41:04 -0700 Subject: [PATCH 096/114] Why flush no work/ --- hrun/include/hrun/work_orchestrator/worker.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/hrun/include/hrun/work_orchestrator/worker.h b/hrun/include/hrun/work_orchestrator/worker.h index c93bd56e5..cf542c5a3 100644 --- a/hrun/include/hrun/work_orchestrator/worker.h +++ b/hrun/include/hrun/work_orchestrator/worker.h @@ -437,11 +437,16 @@ class Worker { } // Verify tasks if (flush_.flushing_ && !task->IsModuleComplete() && !task->IsFlush()) { + int pend_prior = flush_.pending_; if (task->IsLongRunning()) { exec->Monitor(MonitorMode::kFlushStat, task, rctx); } else { flush_.pending_ += 1; } + if (pend_prior == flush_.pending_) { + HILOG(kInfo, "(node {}) Pending on task={} state={} method={}", + HRUN_CLIENT->node_id_, task->task_node_, task->task_state_, task->method_) + } } // Cleanup on task completion if (task->IsModuleComplete()) { From ff0ffd05401d06fe94f2055cc0437fb7e5aae3bf Mon Sep 17 00:00:00 2001 From: Luke Logan Date: Sun, 12 Nov 2023 18:43:37 -0700 Subject: [PATCH 097/114] Why flush no work/ --- hrun/include/hrun/work_orchestrator/worker.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hrun/include/hrun/work_orchestrator/worker.h b/hrun/include/hrun/work_orchestrator/worker.h index cf542c5a3..5e46415ec 100644 --- a/hrun/include/hrun/work_orchestrator/worker.h +++ b/hrun/include/hrun/work_orchestrator/worker.h @@ -443,7 +443,7 @@ class Worker { } else { flush_.pending_ += 1; } - if (pend_prior == flush_.pending_) { + if (pend_prior != flush_.pending_) { HILOG(kInfo, "(node {}) Pending on task={} state={} method={}", HRUN_CLIENT->node_id_, task->task_node_, task->task_state_, task->method_) } From 674ddb569451206fa35d888f38e9a1793a9861e4 Mon Sep 17 00:00:00 2001 From: Luke Logan Date: Sun, 12 Nov 2023 18:48:17 -0700 Subject: [PATCH 098/114] Why flush no work/ --- hrun/include/hrun/work_orchestrator/worker.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/hrun/include/hrun/work_orchestrator/worker.h b/hrun/include/hrun/work_orchestrator/worker.h index 5e46415ec..293f4660e 100644 --- a/hrun/include/hrun/work_orchestrator/worker.h +++ b/hrun/include/hrun/work_orchestrator/worker.h @@ -444,8 +444,9 @@ class Worker { flush_.pending_ += 1; } if (pend_prior != flush_.pending_) { - HILOG(kInfo, "(node {}) Pending on task={} state={} method={}", - HRUN_CLIENT->node_id_, task->task_node_, task->task_state_, task->method_) + HILOG(kInfo, "(node {}) Pending on task={} state={} method={} is_remote={}", + HRUN_CLIENT->node_id_, task->task_node_, task->task_state_, task->method_, + is_remote) } } // Cleanup on task completion From 92add1c6a57a6c3b35b62c572f9b15c26d193eea Mon Sep 17 00:00:00 2001 From: Luke Logan Date: Sun, 12 Nov 2023 19:02:55 -0700 Subject: [PATCH 099/114] Add worker --- hrun/include/hrun/work_orchestrator/worker.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hrun/include/hrun/work_orchestrator/worker.h b/hrun/include/hrun/work_orchestrator/worker.h index 293f4660e..66c037d4e 100644 --- a/hrun/include/hrun/work_orchestrator/worker.h +++ b/hrun/include/hrun/work_orchestrator/worker.h @@ -444,7 +444,7 @@ class Worker { flush_.pending_ += 1; } if (pend_prior != flush_.pending_) { - HILOG(kInfo, "(node {}) Pending on task={} state={} method={} is_remote={}", + HILOG(kInfo, "(node {}) Pending on task={} state={} method={} is_remote={} worker={}", HRUN_CLIENT->node_id_, task->task_node_, task->task_state_, task->method_, is_remote) } From 8b12327f3116b8a877789912404b3e38203ad95b Mon Sep 17 00:00:00 2001 From: Luke Logan Date: Sun, 12 Nov 2023 20:05:22 -0700 Subject: [PATCH 100/114] Add worker --- hrun/include/hrun/work_orchestrator/worker.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hrun/include/hrun/work_orchestrator/worker.h b/hrun/include/hrun/work_orchestrator/worker.h index 66c037d4e..a7a018ccd 100644 --- a/hrun/include/hrun/work_orchestrator/worker.h +++ b/hrun/include/hrun/work_orchestrator/worker.h @@ -446,7 +446,7 @@ class Worker { if (pend_prior != flush_.pending_) { HILOG(kInfo, "(node {}) Pending on task={} state={} method={} is_remote={} worker={}", HRUN_CLIENT->node_id_, task->task_node_, task->task_state_, task->method_, - is_remote) + is_remote, id_) } } // Cleanup on task completion From 3aa2f4455f88dac297a937c60a25d23c3e1a2e58 Mon Sep 17 00:00:00 2001 From: Luke Logan Date: Mon, 13 Nov 2023 07:24:54 -0700 Subject: [PATCH 101/114] Test bucket data placement --- test/unit/hermes/test_bucket.cc | 47 +++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/test/unit/hermes/test_bucket.cc b/test/unit/hermes/test_bucket.cc index aae827175..8e4d449c5 100644 --- a/test/unit/hermes/test_bucket.cc +++ b/test/unit/hermes/test_bucket.cc @@ -654,3 +654,50 @@ TEST_CASE("TestHermesCollectMetadata") { REQUIRE(table.target_info_.size() >= 4); MPI_Barrier(MPI_COMM_WORLD); } + +TEST_CASE("TestHermesDataPlacement") { + int rank, nprocs; + MPI_Barrier(MPI_COMM_WORLD); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs); + + // Initialize Hermes on all nodes + HERMES->ClientInit(); + + // Create a bucket + hermes::Context ctx; + hermes::Bucket bkt("hello"); + + size_t count_per_proc = 16; + size_t off = rank * count_per_proc; + size_t proc_count = off + count_per_proc; + + // Put a few blobs in the bucket + for (size_t i = off; i < proc_count; ++i) { + HILOG(kInfo, "Iteration: {}", i); + hermes::Blob blob(KILOBYTES(4)); + memset(blob.data(), i % 256, blob.size()); + bkt.AsyncPut(std::to_string(i), blob, ctx); + } + sleep(5); + + // Demote half of blobs + for (size_t i = off; i < proc_count - count_per_proc / 2; ++i) { + HILOG(kInfo, "Iteration: {}", i); + hermes::Blob blob(KILOBYTES(4)); + memset(blob.data(), i % 256, blob.size()); + hermes::BlobId blob_id = bkt.GetBlobId(std::to_string(i)); + bkt.ReorganizeBlob(blob_id, .5, 0, ctx); + } + sleep(5); + + // Promote half of blobs + for (size_t i = off; i < proc_count - count_per_proc / 2; ++i) { + HILOG(kInfo, "Iteration: {}", i); + hermes::Blob blob(KILOBYTES(4)); + memset(blob.data(), i % 256, blob.size()); + hermes::BlobId blob_id = bkt.GetBlobId(std::to_string(i)); + bkt.ReorganizeBlob(blob_id, 1, 0, ctx); + } + sleep(5); +} From accc27c73457ea8648487aba2db6c4bbf03d383c Mon Sep 17 00:00:00 2001 From: Luke Logan Date: Mon, 13 Nov 2023 10:49:17 -0700 Subject: [PATCH 102/114] 256MB of RAM per proc --- test/unit/hermes/test_bucket.cc | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/test/unit/hermes/test_bucket.cc b/test/unit/hermes/test_bucket.cc index 8e4d449c5..7f6597a22 100644 --- a/test/unit/hermes/test_bucket.cc +++ b/test/unit/hermes/test_bucket.cc @@ -668,14 +668,14 @@ TEST_CASE("TestHermesDataPlacement") { hermes::Context ctx; hermes::Bucket bkt("hello"); - size_t count_per_proc = 16; + size_t count_per_proc = 256; size_t off = rank * count_per_proc; size_t proc_count = off + count_per_proc; // Put a few blobs in the bucket for (size_t i = off; i < proc_count; ++i) { HILOG(kInfo, "Iteration: {}", i); - hermes::Blob blob(KILOBYTES(4)); + hermes::Blob blob(MEGABYTES(1)); memset(blob.data(), i % 256, blob.size()); bkt.AsyncPut(std::to_string(i), blob, ctx); } @@ -684,8 +684,6 @@ TEST_CASE("TestHermesDataPlacement") { // Demote half of blobs for (size_t i = off; i < proc_count - count_per_proc / 2; ++i) { HILOG(kInfo, "Iteration: {}", i); - hermes::Blob blob(KILOBYTES(4)); - memset(blob.data(), i % 256, blob.size()); hermes::BlobId blob_id = bkt.GetBlobId(std::to_string(i)); bkt.ReorganizeBlob(blob_id, .5, 0, ctx); } @@ -694,8 +692,6 @@ TEST_CASE("TestHermesDataPlacement") { // Promote half of blobs for (size_t i = off; i < proc_count - count_per_proc / 2; ++i) { HILOG(kInfo, "Iteration: {}", i); - hermes::Blob blob(KILOBYTES(4)); - memset(blob.data(), i % 256, blob.size()); hermes::BlobId blob_id = bkt.GetBlobId(std::to_string(i)); bkt.ReorganizeBlob(blob_id, 1, 0, ctx); } From 86ca0854d1a6a0661d072848143520305aa34d59 Mon Sep 17 00:00:00 2001 From: Luke Logan Date: Mon, 13 Nov 2023 10:57:07 -0700 Subject: [PATCH 103/114] Promote everything --- test/unit/hermes/test_bucket.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/unit/hermes/test_bucket.cc b/test/unit/hermes/test_bucket.cc index 7f6597a22..fc5b2a7f8 100644 --- a/test/unit/hermes/test_bucket.cc +++ b/test/unit/hermes/test_bucket.cc @@ -668,7 +668,7 @@ TEST_CASE("TestHermesDataPlacement") { hermes::Context ctx; hermes::Bucket bkt("hello"); - size_t count_per_proc = 256; + size_t count_per_proc = 16; size_t off = rank * count_per_proc; size_t proc_count = off + count_per_proc; @@ -682,7 +682,7 @@ TEST_CASE("TestHermesDataPlacement") { sleep(5); // Demote half of blobs - for (size_t i = off; i < proc_count - count_per_proc / 2; ++i) { + for (size_t i = off; i < proc_count - count_per_proc; ++i) { HILOG(kInfo, "Iteration: {}", i); hermes::BlobId blob_id = bkt.GetBlobId(std::to_string(i)); bkt.ReorganizeBlob(blob_id, .5, 0, ctx); @@ -690,7 +690,7 @@ TEST_CASE("TestHermesDataPlacement") { sleep(5); // Promote half of blobs - for (size_t i = off; i < proc_count - count_per_proc / 2; ++i) { + for (size_t i = off; i < proc_count - count_per_proc; ++i) { HILOG(kInfo, "Iteration: {}", i); hermes::BlobId blob_id = bkt.GetBlobId(std::to_string(i)); bkt.ReorganizeBlob(blob_id, 1, 0, ctx); From facee6d3f4d8f1eedce91d1d5728d10ff63bb81e Mon Sep 17 00:00:00 2001 From: Luke Logan Date: Mon, 13 Nov 2023 11:02:52 -0700 Subject: [PATCH 104/114] Add prints --- test/unit/hermes/test_bucket.cc | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/test/unit/hermes/test_bucket.cc b/test/unit/hermes/test_bucket.cc index fc5b2a7f8..7eb45e0e8 100644 --- a/test/unit/hermes/test_bucket.cc +++ b/test/unit/hermes/test_bucket.cc @@ -673,27 +673,34 @@ TEST_CASE("TestHermesDataPlacement") { size_t proc_count = off + count_per_proc; // Put a few blobs in the bucket + HILOG(kInfo, "Initially placing blobs") for (size_t i = off; i < proc_count; ++i) { HILOG(kInfo, "Iteration: {}", i); hermes::Blob blob(MEGABYTES(1)); memset(blob.data(), i % 256, blob.size()); bkt.AsyncPut(std::to_string(i), blob, ctx); } + MPI_Barrier(MPI_COMM_WORLD); sleep(5); + // Demote half of blobs + HILOG(kInfo, "Demoting blobs") for (size_t i = off; i < proc_count - count_per_proc; ++i) { HILOG(kInfo, "Iteration: {}", i); hermes::BlobId blob_id = bkt.GetBlobId(std::to_string(i)); bkt.ReorganizeBlob(blob_id, .5, 0, ctx); } + MPI_Barrier(MPI_COMM_WORLD); sleep(5); // Promote half of blobs + HILOG(kInfo, "Promoting blobs") for (size_t i = off; i < proc_count - count_per_proc; ++i) { HILOG(kInfo, "Iteration: {}", i); hermes::BlobId blob_id = bkt.GetBlobId(std::to_string(i)); bkt.ReorganizeBlob(blob_id, 1, 0, ctx); } + MPI_Barrier(MPI_COMM_WORLD); sleep(5); } From be5f0fc2b8bcfeed22f53074391e5fd11f08018f Mon Sep 17 00:00:00 2001 From: Luke Logan Date: Mon, 13 Nov 2023 17:08:13 -0700 Subject: [PATCH 105/114] Demote and promote all blobs --- test/unit/hermes/test_bucket.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/unit/hermes/test_bucket.cc b/test/unit/hermes/test_bucket.cc index 7eb45e0e8..deb5ad163 100644 --- a/test/unit/hermes/test_bucket.cc +++ b/test/unit/hermes/test_bucket.cc @@ -686,7 +686,7 @@ TEST_CASE("TestHermesDataPlacement") { // Demote half of blobs HILOG(kInfo, "Demoting blobs") - for (size_t i = off; i < proc_count - count_per_proc; ++i) { + for (size_t i = off; i < proc_count; ++i) { HILOG(kInfo, "Iteration: {}", i); hermes::BlobId blob_id = bkt.GetBlobId(std::to_string(i)); bkt.ReorganizeBlob(blob_id, .5, 0, ctx); @@ -696,7 +696,7 @@ TEST_CASE("TestHermesDataPlacement") { // Promote half of blobs HILOG(kInfo, "Promoting blobs") - for (size_t i = off; i < proc_count - count_per_proc; ++i) { + for (size_t i = off; i < proc_count; ++i) { HILOG(kInfo, "Iteration: {}", i); hermes::BlobId blob_id = bkt.GetBlobId(std::to_string(i)); bkt.ReorganizeBlob(blob_id, 1, 0, ctx); From c37cd0c55c8d1db1bc81e964e3765eca432246de Mon Sep 17 00:00:00 2001 From: Luke Logan Date: Mon, 13 Nov 2023 17:56:07 -0700 Subject: [PATCH 106/114] Use longer sleep --- test/unit/hermes/test_bucket.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/unit/hermes/test_bucket.cc b/test/unit/hermes/test_bucket.cc index deb5ad163..90261f8a0 100644 --- a/test/unit/hermes/test_bucket.cc +++ b/test/unit/hermes/test_bucket.cc @@ -681,7 +681,7 @@ TEST_CASE("TestHermesDataPlacement") { bkt.AsyncPut(std::to_string(i), blob, ctx); } MPI_Barrier(MPI_COMM_WORLD); - sleep(5); + sleep(20); // Demote half of blobs @@ -692,7 +692,7 @@ TEST_CASE("TestHermesDataPlacement") { bkt.ReorganizeBlob(blob_id, .5, 0, ctx); } MPI_Barrier(MPI_COMM_WORLD); - sleep(5); + sleep(20); // Promote half of blobs HILOG(kInfo, "Promoting blobs") @@ -702,5 +702,5 @@ TEST_CASE("TestHermesDataPlacement") { bkt.ReorganizeBlob(blob_id, 1, 0, ctx); } MPI_Barrier(MPI_COMM_WORLD); - sleep(5); + sleep(20); } From c5b355738eb85163c4246a988388c8fba73a0d69 Mon Sep 17 00:00:00 2001 From: Luke Logan Date: Mon, 13 Nov 2023 18:00:56 -0700 Subject: [PATCH 107/114] Default score 1 --- tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc index 16156a21a..e0db13f29 100644 --- a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc +++ b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc @@ -618,7 +618,7 @@ class Server : public TaskLib { blob_info.tag_id_ = tag_id; blob_info.blob_size_ = 0; blob_info.max_blob_size_ = 0; - blob_info.score_ = 0; + blob_info.score_ = 1; blob_info.mod_count_ = 0; blob_info.access_freq_ = 0; blob_info.last_flush_ = 0; From e105fe1ca553717c3f5ea25dbea9837c6d02ed8b Mon Sep 17 00:00:00 2001 From: Luke Logan Date: Mon, 13 Nov 2023 18:07:55 -0700 Subject: [PATCH 108/114] Don't ask if should reorganize --- tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc index e0db13f29..b16307abd 100644 --- a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc +++ b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc @@ -845,10 +845,10 @@ class Server : public TaskLib { } else { blob_info.score_ = task->score_; } - if (!ShouldReorganize(blob_info, task->score_, task->task_node_)) { - task->SetModuleComplete(); - return; - } +// if (!ShouldReorganize(blob_info, task->score_, task->task_node_)) { +// task->SetModuleComplete(); +// return; +// } task->data_ = HRUN_CLIENT->AllocateBufferServer( blob_info.blob_size_, task).shm_; task->data_size_ = blob_info.blob_size_; From 5a848596e1f8a5bb090e08d381a3d93aa4c51100 Mon Sep 17 00:00:00 2001 From: Luke Logan Date: Mon, 13 Nov 2023 19:16:58 -0700 Subject: [PATCH 109/114] Infinite program --- test/unit/hermes/test_bucket.cc | 45 +++++++++++++++++---------------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/test/unit/hermes/test_bucket.cc b/test/unit/hermes/test_bucket.cc index 90261f8a0..8fa4b563e 100644 --- a/test/unit/hermes/test_bucket.cc +++ b/test/unit/hermes/test_bucket.cc @@ -674,33 +674,34 @@ TEST_CASE("TestHermesDataPlacement") { // Put a few blobs in the bucket HILOG(kInfo, "Initially placing blobs") - for (size_t i = off; i < proc_count; ++i) { - HILOG(kInfo, "Iteration: {}", i); - hermes::Blob blob(MEGABYTES(1)); - memset(blob.data(), i % 256, blob.size()); - bkt.AsyncPut(std::to_string(i), blob, ctx); - } - MPI_Barrier(MPI_COMM_WORLD); - sleep(20); - - - // Demote half of blobs - HILOG(kInfo, "Demoting blobs") for (size_t i = off; i < proc_count; ++i) { HILOG(kInfo, "Iteration: {}", i); - hermes::BlobId blob_id = bkt.GetBlobId(std::to_string(i)); - bkt.ReorganizeBlob(blob_id, .5, 0, ctx); + hermes::Blob blob(MEGABYTES(1)); + memset(blob.data(), i % 256, blob.size()); + bkt.AsyncPut(std::to_string(i), blob, ctx); } MPI_Barrier(MPI_COMM_WORLD); sleep(20); - // Promote half of blobs - HILOG(kInfo, "Promoting blobs") - for (size_t i = off; i < proc_count; ++i) { - HILOG(kInfo, "Iteration: {}", i); - hermes::BlobId blob_id = bkt.GetBlobId(std::to_string(i)); - bkt.ReorganizeBlob(blob_id, 1, 0, ctx); + while (true) { + // Demote half of blobs + HILOG(kInfo, "Demoting blobs") + for (size_t i = off; i < proc_count; ++i) { + HILOG(kInfo, "Iteration: {}", i); + hermes::BlobId blob_id = bkt.GetBlobId(std::to_string(i)); + bkt.ReorganizeBlob(blob_id, .5, 0, ctx); + } + MPI_Barrier(MPI_COMM_WORLD); + sleep(20); + + // Promote half of blobs + HILOG(kInfo, "Promoting blobs") + for (size_t i = off; i < proc_count; ++i) { + HILOG(kInfo, "Iteration: {}", i); + hermes::BlobId blob_id = bkt.GetBlobId(std::to_string(i)); + bkt.ReorganizeBlob(blob_id, 1, 0, ctx); + } + MPI_Barrier(MPI_COMM_WORLD); + sleep(20); } - MPI_Barrier(MPI_COMM_WORLD); - sleep(20); } From d7f9cebca85acb600a59684b26403adba59a24c7 Mon Sep 17 00:00:00 2001 From: Luke Logan Date: Mon, 13 Nov 2023 20:31:16 -0700 Subject: [PATCH 110/114] Add new demo test --- test/unit/hermes/test_bucket.cc | 48 +++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/test/unit/hermes/test_bucket.cc b/test/unit/hermes/test_bucket.cc index 8fa4b563e..9e87f4fb9 100644 --- a/test/unit/hermes/test_bucket.cc +++ b/test/unit/hermes/test_bucket.cc @@ -705,3 +705,51 @@ TEST_CASE("TestHermesDataPlacement") { sleep(20); } } + +TEST_CASE("TestHermesDataPlacementFancy") { + int rank, nprocs; + MPI_Barrier(MPI_COMM_WORLD); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs); + + // Initialize Hermes on all nodes + HERMES->ClientInit(); + + // Create a bucket + hermes::Context ctx; + hermes::Bucket bkt("hello"); + + size_t count_per_proc = 16; + size_t off = rank * count_per_proc; + size_t proc_count = off + count_per_proc; + + // Put a few blobs in the bucket + HILOG(kInfo, "Initially placing blobs") + for (size_t i = off; i < proc_count; ++i) { + HILOG(kInfo, "Iteration: {}", i); + hermes::Blob blob(MEGABYTES(1)); + memset(blob.data(), i % 256, blob.size()); + bkt.AsyncPut(std::to_string(i), blob, ctx); + } + MPI_Barrier(MPI_COMM_WORLD); + sleep(20); + + std::vector scores = { + 1.0, .5, .01, 0 + }; + int count = 0; + + while (true) { + int score_id = count % scores.size(); + // Demote half of blobs + HILOG(kInfo, "Demoting blobs") + for (size_t i = off; i < proc_count; ++i) { + HILOG(kInfo, "Iteration: {}", i); + hermes::BlobId blob_id = bkt.GetBlobId(std::to_string(i)); + bkt.ReorganizeBlob(blob_id, scores[score_id], 0, ctx); + } + MPI_Barrier(MPI_COMM_WORLD); + sleep(20); + count += 1; + } +} From 9dc5a2afe0c42c5ddb94fc03814db225e6c6b86f Mon Sep 17 00:00:00 2001 From: Luke Logan Date: Tue, 14 Nov 2023 11:38:46 -0700 Subject: [PATCH 111/114] Use vector instead of string for GetName --- include/hermes/hermes_types.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/include/hermes/hermes_types.h b/include/hermes/hermes_types.h index 49ad4eb75..c92adb6db 100644 --- a/include/hermes/hermes_types.h +++ b/include/hermes/hermes_types.h @@ -325,8 +325,10 @@ struct BlobInfo { } /** Get name as std::string */ - std::string GetName() { - return name_.str(); + std::vector GetName() { + std::vector data(name_.size()); + memcpy(data.data(), name_.data(), name_.size()); + return data; } }; @@ -348,8 +350,10 @@ struct TagInfo { } /** Get std::string of name */ - std::string GetName() { - return name_.str(); + std::vector GetName() { + std::vector data(name_.size()); + memcpy(data.data(), name_.data(), name_.size()); + return data; } }; From 2fae08d1a7d0e8d2b573be9ab8f8572647637014 Mon Sep 17 00:00:00 2001 From: Luke Logan Date: Tue, 14 Nov 2023 17:19:48 -0600 Subject: [PATCH 112/114] Sleep 5 --- test/unit/hermes/test_bucket.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/unit/hermes/test_bucket.cc b/test/unit/hermes/test_bucket.cc index 9e87f4fb9..f7a228e59 100644 --- a/test/unit/hermes/test_bucket.cc +++ b/test/unit/hermes/test_bucket.cc @@ -749,7 +749,7 @@ TEST_CASE("TestHermesDataPlacementFancy") { bkt.ReorganizeBlob(blob_id, scores[score_id], 0, ctx); } MPI_Barrier(MPI_COMM_WORLD); - sleep(20); + sleep(5); count += 1; } } From f8646610fdb3382135d96eeab039855cacab8ebe Mon Sep 17 00:00:00 2001 From: Luke Logan Date: Wed, 15 Nov 2023 10:21:42 -0700 Subject: [PATCH 113/114] Add print statements to the coeus quantity --- test/unit/hermes/test_bucket.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test/unit/hermes/test_bucket.cc b/test/unit/hermes/test_bucket.cc index f7a228e59..4c44332b9 100644 --- a/test/unit/hermes/test_bucket.cc +++ b/test/unit/hermes/test_bucket.cc @@ -587,6 +587,8 @@ TEST_CASE("TestHermesDataOp") { size_t proc_count = off + count_per_proc; size_t page_size = KILOBYTES(4); + HILOG(kInfo, "GENERATING VALUES BETWEEN 5 and 261"); + // Put a few blobs in the bucket for (size_t i = off; i < proc_count; ++i) { HILOG(kInfo, "Iteration: {}", i); @@ -622,6 +624,8 @@ TEST_CASE("TestHermesDataOp") { float min = *(float *)blob2.data(); REQUIRE(size == sizeof(float) * count_per_proc * nprocs); REQUIRE(min == 5); + + HILOG(kInfo, "MINIMUM VALUE QUERY FROM EMPRESS: {}", min); } TEST_CASE("TestHermesCollectMetadata") { From 7020424fa222af95fe23e54dfc72f60a959990a0 Mon Sep 17 00:00:00 2001 From: Luke Logan Date: Wed, 15 Nov 2023 10:24:22 -0700 Subject: [PATCH 114/114] Use MB sizes --- test/unit/hermes/test_bucket.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/unit/hermes/test_bucket.cc b/test/unit/hermes/test_bucket.cc index 4c44332b9..ec8d09dd9 100644 --- a/test/unit/hermes/test_bucket.cc +++ b/test/unit/hermes/test_bucket.cc @@ -582,10 +582,10 @@ TEST_CASE("TestHermesDataOp") { op_graph.ops_.emplace_back(op); HERMES->RegisterOp(op_graph); - size_t count_per_proc = 16; + size_t count_per_proc = 256; size_t off = rank * count_per_proc; size_t proc_count = off + count_per_proc; - size_t page_size = KILOBYTES(4); + size_t page_size = MEGABYTES(1); HILOG(kInfo, "GENERATING VALUES BETWEEN 5 and 261");