Skip to content

Commit

Permalink
Merge pull request ceph#47681 from ronen-fr/wip-rf-delog1
Browse files Browse the repository at this point in the history
osd/scrub: improving scrub logs
Reviewed-by: Aishwarya Mathuria <[email protected]>
  • Loading branch information
ronen-fr authored Aug 28, 2022
2 parents 30078cd + 95dd59b commit 07bfcac
Show file tree
Hide file tree
Showing 8 changed files with 115 additions and 106 deletions.
29 changes: 22 additions & 7 deletions src/include/utime_fmt.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,29 +4,44 @@
/**
* \file fmtlib formatter for utime_t
*/
#include <fmt/format.h>
#include <fmt/chrono.h>

#include <string_view>
#include <fmt/format.h>

#include "include/utime.h"

template <>
struct fmt::formatter<utime_t> {
constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
template <typename ParseContext>
constexpr auto parse(ParseContext& ctx)
{
auto it = ctx.begin();
if (it != ctx.end() && *it == 's') {
short_format = true;
++it;
}
return it;
}

template <typename FormatContext>
auto format(const utime_t& utime, FormatContext& ctx)
{
if (utime.sec() < ((time_t)(60 * 60 * 24 * 365 * 10))) {
// raw seconds. this looks like a relative time.
return fmt::format_to(ctx.out(), "{}.{:06}", (long)utime.sec(),
utime.usec());
utime.usec());
}

// this looks like an absolute time.
// conform to http://en.wikipedia.org/wiki/ISO_8601
auto asgmt = fmt::gmtime(utime.sec());
return fmt::format_to(ctx.out(), "{:%FT%T}.{:06}{:%z}", asgmt, utime.usec(), asgmt);
// (unless short_format is set)
auto aslocal = fmt::localtime(utime.sec());
if (short_format) {
return fmt::format_to(ctx.out(), "{:%FT%T}.{:03}", aslocal,
utime.usec() / 1000);
}
return fmt::format_to(ctx.out(), "{:%FT%T}.{:06}{:%z}", aslocal,
utime.usec(), aslocal);
}

bool short_format{false};
};
11 changes: 4 additions & 7 deletions src/osd/PG.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1685,10 +1685,8 @@ std::optional<requested_scrub_t> PG::validate_scrub_mode() const
*/
void PG::on_info_history_change()
{
dout(20) << __func__ << " for a " << (is_primary() ? "Primary" : "non-primary") <<dendl;

ceph_assert(m_scrubber);
m_scrubber->on_maybe_registration_change(m_planned_scrub);
m_scrubber->on_primary_change(__func__, m_planned_scrub);
}

void PG::reschedule_scrub()
Expand All @@ -1705,10 +1703,9 @@ void PG::reschedule_scrub()
void PG::on_primary_status_change(bool was_primary, bool now_primary)
{
// make sure we have a working scrubber when becoming a primary

if (was_primary != now_primary) {
ceph_assert(m_scrubber);
m_scrubber->on_primary_change(m_planned_scrub);
m_scrubber->on_primary_change(__func__, m_planned_scrub);
}
}

Expand Down Expand Up @@ -1742,10 +1739,10 @@ void PG::on_new_interval()

assert(m_scrubber);
// log some scrub data before we react to the interval
dout(20) << __func__ << (is_scrub_queued_or_active() ? " scrubbing " : " ")
dout(30) << __func__ << (is_scrub_queued_or_active() ? " scrubbing " : " ")
<< "flags: " << m_planned_scrub << dendl;

m_scrubber->on_maybe_registration_change(m_planned_scrub);
m_scrubber->on_primary_change(__func__, m_planned_scrub);
}

epoch_t PG::oldest_stored_osdmap() {
Expand Down
1 change: 0 additions & 1 deletion src/osd/scrubber/PrimaryLogScrub.cc
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,6 @@ PrimaryLogScrub::PrimaryLogScrub(PrimaryLogPG* pg) : PgScrubber{pg}, m_pl_pg{pg}

void PrimaryLogScrub::_scrub_clear_state()
{
dout(15) << __func__ << dendl;
m_scrub_cstat = object_stat_collection_t();
}

Expand Down
76 changes: 36 additions & 40 deletions src/osd/scrubber/osd_scrub_sched.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ using namespace ::std::literals;
#define dout_context (cct)
#define dout_subsys ceph_subsys_osd
#undef dout_prefix
#define dout_prefix *_dout << "osd." << whoami << " "
#define dout_prefix *_dout << "osd." << whoami << " "

ScrubQueue::ScrubJob::ScrubJob(CephContext* cct, const spg_t& pg, int node_id)
: RefCountedObject{cct}
Expand Down Expand Up @@ -45,9 +45,8 @@ void ScrubQueue::ScrubJob::update_schedule(
// the (atomic) flag will only be cleared by select_pg_and_scrub() after
// scan_penalized() is called and the job was moved to the to_scrub queue.
updated = true;

dout(10) << " pg[" << pgid << "] adjusted: " << schedule.scheduled_at << " "
<< registration_state() << dendl;
dout(10) << fmt::format("{}: pg[{}] adjusted: {:s} ({})", __func__, pgid,
schedule.scheduled_at, registration_state()) << dendl;
}

std::string ScrubQueue::ScrubJob::scheduling_state(utime_t now_is,
Expand All @@ -65,7 +64,7 @@ std::string ScrubQueue::ScrubJob::scheduling_state(utime_t now_is,
return fmt::format("queued for {}scrub", (is_deep_expected ? "deep " : ""));
}

return fmt::format("{}scrub scheduled @ {}",
return fmt::format("{}scrub scheduled @ {:s}",
(is_deep_expected ? "deep " : ""),
schedule.scheduled_at);
}
Expand Down Expand Up @@ -151,13 +150,15 @@ void ScrubQueue::remove_from_osd_queue(ScrubJobRef scrub_job)
}
}

void ScrubQueue::register_with_osd(ScrubJobRef scrub_job,
const ScrubQueue::sched_params_t& suggested)
void ScrubQueue::register_with_osd(
ScrubJobRef scrub_job,
const ScrubQueue::sched_params_t& suggested)
{
qu_state_t state_at_entry = scrub_job->state.load();

dout(15) << "pg[" << scrub_job->pgid << "] was "
<< qu_state_text(state_at_entry) << dendl;
dout(20) << fmt::format(
"pg[{}] state at entry: <{:.14}>", scrub_job->pgid,
state_at_entry)
<< dendl;

switch (state_at_entry) {
case qu_state_t::registered:
Expand All @@ -172,7 +173,7 @@ void ScrubQueue::register_with_osd(ScrubJobRef scrub_job,

if (state_at_entry != scrub_job->state) {
lck.unlock();
dout(5) << " scrub job state changed" << dendl;
dout(5) << " scrub job state changed. Retrying." << dendl;
// retry
register_with_osd(scrub_job, suggested);
break;
Expand All @@ -183,7 +184,6 @@ void ScrubQueue::register_with_osd(ScrubJobRef scrub_job,
scrub_job->in_queues = true;
scrub_job->state = qu_state_t::registered;
}

break;

case qu_state_t::unregistering:
Expand All @@ -204,10 +204,11 @@ void ScrubQueue::register_with_osd(ScrubJobRef scrub_job,
break;
}

dout(10) << "pg(" << scrub_job->pgid << ") sched-state changed from "
<< qu_state_text(state_at_entry) << " to "
<< qu_state_text(scrub_job->state)
<< " at: " << scrub_job->schedule.scheduled_at << dendl;
dout(10) << fmt::format(
"pg[{}] sched-state changed from <{:.14}> to <{:.14}> (@{:s})",
scrub_job->pgid, state_at_entry, scrub_job->state.load(),
scrub_job->schedule.scheduled_at)
<< dendl;
}

// look mommy - no locks!
Expand All @@ -222,10 +223,9 @@ void ScrubQueue::update_job(ScrubJobRef scrub_job,
ScrubQueue::sched_params_t ScrubQueue::determine_scrub_time(
const requested_scrub_t& request_flags,
const pg_info_t& pg_info,
const pool_opts_t pool_conf) const
const pool_opts_t& pool_conf) const
{
ScrubQueue::sched_params_t res;
dout(15) << ": requested_scrub_t: {}" << request_flags << dendl;

if (request_flags.must_scrub || request_flags.need_auto) {

Expand All @@ -234,8 +234,7 @@ ScrubQueue::sched_params_t ScrubQueue::determine_scrub_time(
res.is_must = ScrubQueue::must_scrub_t::mandatory;
// we do not need the interval data in this case

} else if (pg_info.stats.stats_invalid &&
conf()->osd_scrub_invalid_stats) {
} else if (pg_info.stats.stats_invalid && conf()->osd_scrub_invalid_stats) {
res.proposed_time = time_now();
res.is_must = ScrubQueue::must_scrub_t::mandatory;

Expand All @@ -246,13 +245,12 @@ ScrubQueue::sched_params_t ScrubQueue::determine_scrub_time(
}

dout(15) << fmt::format(
": suggested: {} hist: {} v: {}/{} must: {} pool-min: {}",
res.proposed_time,
pg_info.history.last_scrub_stamp,
"suggested: {:s} hist: {:s} v:{}/{} must:{} pool-min:{} {}",
res.proposed_time, pg_info.history.last_scrub_stamp,
(bool)pg_info.stats.stats_invalid,
conf()->osd_scrub_invalid_stats,
(res.is_must == must_scrub_t::mandatory ? "y" : "n"),
res.min_interval)
res.min_interval, request_flags)
<< dendl;
return res;
}
Expand Down Expand Up @@ -522,21 +520,10 @@ Scrub::schedule_result_t ScrubQueue::select_from_group(
ScrubQueue::scrub_schedule_t ScrubQueue::adjust_target_time(
const sched_params_t& times) const
{
ScrubQueue::scrub_schedule_t sched_n_dead{times.proposed_time,
times.proposed_time};

if (g_conf()->subsys.should_gather<ceph_subsys_osd, 20>()) {
dout(20) << "min t: " << times.min_interval
<< " osd: " << conf()->osd_scrub_min_interval
<< " max t: " << times.max_interval
<< " osd: " << conf()->osd_scrub_max_interval << dendl;

dout(20) << "at " << sched_n_dead.scheduled_at << " ratio "
<< conf()->osd_scrub_interval_randomize_ratio << dendl;
}
ScrubQueue::scrub_schedule_t sched_n_dead{
times.proposed_time, times.proposed_time};

if (times.is_must == ScrubQueue::must_scrub_t::not_mandatory) {

// unless explicitly requested, postpone the scrub with a random delay
double scrub_min_interval = times.min_interval > 0
? times.min_interval
Expand All @@ -555,10 +542,19 @@ ScrubQueue::scrub_schedule_t ScrubQueue::adjust_target_time(
} else {
sched_n_dead.deadline += scrub_max_interval;
}
// note: no specific job can be named in the log message
dout(20) << fmt::format(
"not-must. Was:{:s} {{min:{}/{} max:{}/{} ratio:{}}} "
"Adjusted:{:s} ({:s})",
times.proposed_time, fmt::group_digits(times.min_interval),
fmt::group_digits(conf()->osd_scrub_min_interval),
fmt::group_digits(times.max_interval),
fmt::group_digits(conf()->osd_scrub_max_interval),
conf()->osd_scrub_interval_randomize_ratio,
sched_n_dead.scheduled_at, sched_n_dead.deadline)
<< dendl;
}

dout(17) << "at (final) " << sched_n_dead.scheduled_at << " - "
<< sched_n_dead.deadline << dendl;
// else - no log needed. All relevant data will be logged by the caller
return sched_n_dead;
}

Expand Down
34 changes: 22 additions & 12 deletions src/osd/scrubber/osd_scrub_sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -271,8 +271,8 @@ class ScrubQueue {
*/
std::string_view registration_state() const
{
return in_queues.load(std::memory_order_relaxed) ? " in-queue"
: " not-queued";
return in_queues.load(std::memory_order_relaxed) ? "in-queue"
: "not-queued";
}

/**
Expand Down Expand Up @@ -363,7 +363,7 @@ class ScrubQueue {

sched_params_t determine_scrub_time(const requested_scrub_t& request_flags,
const pg_info_t& pg_info,
const pool_opts_t pool_conf) const;
const pool_opts_t& pool_conf) const;

public:
void dump_scrubs(ceph::Formatter* f) const;
Expand Down Expand Up @@ -421,7 +421,7 @@ class ScrubQueue {
* variables. Specifically, the following are guaranteed:
* - 'in_queues' is asserted only if the job is in one of the queues;
* - a job will only be in state 'registered' if in one of the queues;
* - no job will be in the two queues simulatenously
* - no job will be in the two queues simultaneously;
*
* Note that PG locks should not be acquired while holding jobs_lock.
*/
Expand Down Expand Up @@ -522,6 +522,19 @@ class ScrubQueue {
virtual utime_t time_now() const { return ceph_clock_now(); }
};

template <>
struct fmt::formatter<ScrubQueue::qu_state_t>
: fmt::formatter<std::string_view> {
template <typename FormatContext>
auto format(const ScrubQueue::qu_state_t& s, FormatContext& ctx)
{
auto out = ctx.out();
out = fmt::formatter<string_view>::format(
std::string{ScrubQueue::qu_state_text(s)}, ctx);
return out;
}
};

template <>
struct fmt::formatter<ScrubQueue::ScrubJob> {
constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
Expand All @@ -531,13 +544,10 @@ struct fmt::formatter<ScrubQueue::ScrubJob> {
{
return fmt::format_to(
ctx.out(),
"{}, {} dead: {} - {} / failure: {} / pen. t.o.: {} / queue state: {}",
sjob.pgid,
sjob.schedule.scheduled_at,
sjob.schedule.deadline,
sjob.registration_state(),
sjob.resources_failure,
sjob.penalty_timeout,
ScrubQueue::qu_state_text(sjob.state));
"pg[{}] @ {:s} (dl:{:s}) - <{}> / failure: {} / pen. t.o.: {:s} / queue "
"state: {:.7}",
sjob.pgid, sjob.schedule.scheduled_at, sjob.schedule.deadline,
sjob.registration_state(), sjob.resources_failure, sjob.penalty_timeout,
sjob.state.load(std::memory_order_relaxed));
}
};
Loading

0 comments on commit 07bfcac

Please sign in to comment.