diff --git a/common/place/placer_static.cc b/common/place/placer_static.cc index d3cb4863d5..a7a0004f95 100644 --- a/common/place/placer_static.cc +++ b/common/place/placer_static.cc @@ -40,6 +40,14 @@ #include "fftsg.h" +#ifndef NEXTPNR_DISABLE_THREADS +#include +#include +#include +#include + +#endif + NEXTPNR_NAMESPACE_BEGIN using namespace StaticUtil; @@ -140,6 +148,93 @@ struct PlacerNet int hpwl() { return (b1.x - b0.x) + (b1.y - b0.y); } }; +#ifdef NEXTPNR_DISABLE_THREADS +struct ThreadPool +{ + ThreadPool(int){}; + + void run(int N, std::function func) + { + for (int i = 0; i < N; i++) + func(i); + }; +}; +#else +struct ThreadPool +{ + ThreadPool(int thread_count) + { + done.resize(thread_count, false); + for (int i = 0; i < thread_count; i++) { + threads.emplace_back([this, i]() { this->worker(i); }); + } + } + std::vector threads; + std::condition_variable cv_start, cv_done; + std::mutex mutex; + + bool work_available = false; + bool shutdown = false; + std::vector done; + std::function work; + int work_count; + + ~ThreadPool() + { + { + std::lock_guard lk(mutex); + shutdown = true; + } + cv_start.notify_all(); + for (auto &t : threads) + t.join(); + } + + void run(int N, std::function func) + { + { + std::lock_guard lk(mutex); + work = func; + work_count = N; + work_available = true; + std::fill(done.begin(), done.end(), false); + } + cv_start.notify_all(); + { + std::unique_lock lk(mutex); + cv_done.wait(lk, [this] { return std::all_of(done.begin(), done.end(), [](bool x) { return x; }); }); + work_available = false; + } + } + + void worker(int idx) + { + while (true) { + std::unique_lock lk(mutex); + cv_start.wait(lk, [this, idx] { return (work_available && !done.at(idx)) || shutdown; }); + if (shutdown) { + lk.unlock(); + break; + } else if (work_available && !done.at(idx)) { + int work_per_thread = (work_count + int(threads.size()) - 1) / threads.size(); + int begin = work_per_thread * idx; + int end = std::min(work_count, work_per_thread * (idx + 1)); + lk.unlock(); + + for (int j = begin; j < end; j++) { + work(j); + } + + lk.lock(); + done.at(idx) = true; + lk.unlock(); + cv_done.notify_one(); + } + } + } +}; +#endif + class StaticPlacer { Context *ctx; @@ -154,6 +249,7 @@ class StaticPlacer FastBels fast_bels; TimingAnalyser tmg; + ThreadPool pool; int width, height; int iter = 0; @@ -626,12 +722,14 @@ class StaticPlacer RealPair wl_coeff{0.5f, 0.5f}; - void update_nets(Axis axis, bool ref) + void update_nets(bool ref) { static constexpr float min_wirelen_force = -300.f; - for (auto &net : nets) { + pool.run(2 * nets.size(), [&](int i) { + auto &net = nets.at(i / 2); + auto axis = (i % 2) ? Axis::Y : Axis::X; if (net.skip) - continue; + return; net.min_exp.at(axis) = 0; net.x_min_exp.at(axis) = 0; net.max_exp.at(axis) = 0; @@ -665,7 +763,7 @@ class StaticPlacer } net.wa_wl.at(axis) = (net.x_max_exp.at(axis) / net.max_exp.at(axis)) - (net.x_min_exp.at(axis) / net.min_exp.at(axis)); - } + }); } float wirelen_grad(CellInfo *cell, Axis axis, bool ref) @@ -709,13 +807,11 @@ class StaticPlacer void update_gradients(bool ref = true, bool set_prev = true, bool init_penalty = false) { // TODO: skip non-group cells more efficiently? - for (int group = 0; group < int(groups.size()); group++) { + pool.run(groups.size(), [&](int group) { compute_density(group, ref); run_fft(group); - } - for (auto axis : {Axis::X, Axis::Y}) { - update_nets(axis, ref); - } + }); + update_nets(ref); // First loop: back up gradients if required; set to zero; and compute density gradient for (auto &cell : mcells) { auto &g = groups.at(cell.group); @@ -966,9 +1062,7 @@ class StaticPlacer log_info("Strict legalising %d cells...\n", int(to_legalise.size())); float pre_hpwl = system_hpwl(); legalise_placement_strict(true); - for (auto axis : {Axis::X, Axis::Y}) { - update_nets(axis, true); - } + update_nets(true); float post_hpwl = system_hpwl(); log_info("HPWL after legalise: %f (delta: %f)\n", post_hpwl, post_hpwl - pre_hpwl); } @@ -1244,7 +1338,8 @@ class StaticPlacer } public: - StaticPlacer(Context *ctx, PlacerStaticCfg cfg) : ctx(ctx), cfg(cfg), fast_bels(ctx, true, 8), tmg(ctx) + StaticPlacer(Context *ctx, PlacerStaticCfg cfg) + : ctx(ctx), cfg(cfg), fast_bels(ctx, true, 8), tmg(ctx), pool(ctx->setting("threads", 8)) { groups.resize(cfg.cell_groups.size()); };