Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

osd/OSD: allow new message type "MSG_OSD_MARK_ME_DOWN_AND_DEAD" send by OSD #1

Open
wants to merge 3 commits into
base: wip-nitzan-fast-shutdown-notify-mon
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/common/options/global.yaml.in
Original file line number Diff line number Diff line change
Expand Up @@ -3271,7 +3271,7 @@ options:
desc: Tell mon about OSD shutdown on immediate shutdown
long_desc: Tell the monitor the OSD is shutting down on immediate shutdown. This
helps with cluster log messages from other OSDs reporting it immediately failed.
default: false
default: true
see_also:
- osd_fast_shutdown
- osd_mon_shutdown_timeout
Expand Down
13 changes: 12 additions & 1 deletion src/messages/MOSDMarkMeDown.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

class MOSDMarkMeDown final : public PaxosServiceMessage {
private:
static constexpr int HEAD_VERSION = 3;
static constexpr int HEAD_VERSION = 4;
static constexpr int COMPAT_VERSION = 3;

public:
Expand All @@ -28,6 +28,7 @@ class MOSDMarkMeDown final : public PaxosServiceMessage {
entity_addrvec_t target_addrs;
epoch_t epoch = 0;
bool request_ack = false; // ack requested
bool down_and_dead = false; // mark down and dead

MOSDMarkMeDown()
: PaxosServiceMessage{MSG_OSD_MARK_ME_DOWN, 0,
Expand All @@ -38,6 +39,12 @@ class MOSDMarkMeDown final : public PaxosServiceMessage {
HEAD_VERSION, COMPAT_VERSION},
fsid(fs), target_osd(osd), target_addrs(av),
epoch(e), request_ack(request_ack) {}
MOSDMarkMeDown(const uuid_d &fs, int osd, const entity_addrvec_t& av,
epoch_t e, bool request_ack, bool down_and_dead)
: PaxosServiceMessage{MSG_OSD_MARK_ME_DOWN_AND_DEAD, e,
HEAD_VERSION, COMPAT_VERSION},
fsid(fs), target_osd(osd), target_addrs(av),
epoch(e), request_ack(request_ack), down_and_dead(down_and_dead) {}
private:
~MOSDMarkMeDown() final {}

Expand All @@ -54,6 +61,8 @@ class MOSDMarkMeDown final : public PaxosServiceMessage {
decode(target_addrs, p);
decode(epoch, p);
decode(request_ack, p);
assert(header.version >= 4);
decode(down_and_dead, p);
}

void encode_payload(uint64_t features) override {
Expand All @@ -67,12 +76,14 @@ class MOSDMarkMeDown final : public PaxosServiceMessage {
encode(target_addrs, payload, features);
encode(epoch, payload);
encode(request_ack, payload);
encode(down_and_dead, payload);
}

std::string_view get_type_name() const override { return "MOSDMarkMeDown"; }
void print(std::ostream& out) const override {
out << "MOSDMarkMeDown("
<< "request_ack=" << request_ack
<< ", down_and_dead=" << down_and_dead
<< ", osd." << target_osd
<< ", " << target_addrs
<< ", fsid=" << fsid
Expand Down
1 change: 1 addition & 0 deletions src/mon/Monitor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4546,6 +4546,7 @@ void Monitor::dispatch_op(MonOpRequestRef op)
case MSG_OSD_BEACON:
case MSG_OSD_MARK_ME_DOWN:
case MSG_OSD_MARK_ME_DEAD:
case MSG_OSD_MARK_ME_DOWN_AND_DEAD:
case MSG_OSD_FULL:
case MSG_OSD_FAILURE:
case MSG_OSD_BOOT:
Expand Down
24 changes: 24 additions & 0 deletions src/mon/OSDMonitor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2734,6 +2734,7 @@ bool OSDMonitor::preprocess_query(MonOpRequestRef op)

// damp updates
case MSG_OSD_MARK_ME_DOWN:
case MSG_OSD_MARK_ME_DOWN_AND_DEAD:
return preprocess_mark_me_down(op);
case MSG_OSD_MARK_ME_DEAD:
return preprocess_mark_me_dead(op);
Expand Down Expand Up @@ -2779,6 +2780,8 @@ bool OSDMonitor::prepare_update(MonOpRequestRef op)
// damp updates
case MSG_OSD_MARK_ME_DOWN:
return prepare_mark_me_down(op);
case MSG_OSD_MARK_ME_DOWN_AND_DEAD:
return prepare_mark_me_down_and_dead(op);
case MSG_OSD_MARK_ME_DEAD:
return prepare_mark_me_dead(op);
case MSG_OSD_FULL:
Expand Down Expand Up @@ -3065,6 +3068,27 @@ bool OSDMonitor::prepare_mark_me_down(MonOpRequestRef op)
return true;
}

bool OSDMonitor::prepare_mark_me_down_and_dead(MonOpRequestRef op)
{
op->mark_osdmon_event(__func__);
auto m = op->get_req<MOSDMarkMeDown>();
int target_osd = m->target_osd;

ceph_assert(osdmap.is_up(target_osd));
ceph_assert(osdmap.get_addrs(target_osd) == m->target_addrs);

mon.clog->info() << "osd." << target_osd << " marked itself down and dead as of e"
<< m->get_epoch();
pending_inc.new_state[target_osd] = CEPH_OSD_UP;
if (!pending_inc.new_xinfo.count(target_osd)) {
pending_inc.new_xinfo[target_osd] = osdmap.osd_xinfo[target_osd];
}
pending_inc.new_xinfo[target_osd].dead_epoch = m->get_epoch();
if (m->request_ack)
wait_for_finished_proposal(op, new C_AckMarkedDown(this, op));
return true;
}

bool OSDMonitor::preprocess_mark_me_dead(MonOpRequestRef op)
{
op->mark_osdmon_event(__func__);
Expand Down
1 change: 1 addition & 0 deletions src/mon/OSDMonitor.h
Original file line number Diff line number Diff line change
Expand Up @@ -430,6 +430,7 @@ class OSDMonitor : public PaxosService,
bool preprocess_failure(MonOpRequestRef op);
bool prepare_failure(MonOpRequestRef op);
bool prepare_mark_me_down(MonOpRequestRef op);
bool prepare_mark_me_down_and_dead(MonOpRequestRef op);
void process_failures();
void take_all_failures(std::list<MonOpRequestRef>& ls);

Expand Down
1 change: 1 addition & 0 deletions src/msg/Message.cc
Original file line number Diff line number Diff line change
Expand Up @@ -499,6 +499,7 @@ Message *decode_message(CephContext *cct,
m = make_message<MOSDFailure>();
break;
case MSG_OSD_MARK_ME_DOWN:
case MSG_OSD_MARK_ME_DOWN_AND_DEAD:
m = make_message<MOSDMarkMeDown>();
break;
case MSG_OSD_MARK_ME_DEAD:
Expand Down
1 change: 1 addition & 0 deletions src/msg/Message.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@
#define MSG_OSD_MARK_ME_DOWN 74
#define MSG_OSD_FULL 75
#define MSG_OSD_MARK_ME_DEAD 123
#define MSG_OSD_MARK_ME_DOWN_AND_DEAD 124

// removed right after luminous
//#define MSG_OSD_SUBOP 76
Expand Down
31 changes: 23 additions & 8 deletions src/osd/OSD.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1299,20 +1299,35 @@ bool OSDService::prepare_to_stop()

OSDMapRef osdmap = get_osdmap();
if (osdmap && osdmap->is_up(whoami)) {
dout(0) << __func__ << " telling mon we are shutting down" << dendl;
set_state(PREPARING_TO_STOP);
monc->send_mon_message(
new MOSDMarkMeDown(
monc->get_fsid(),
whoami,
osdmap->get_addrs(whoami),
osdmap->get_epoch(),
true // request ack
if (cct->_conf->osd_fast_shutdown &&
cct->_conf->osd_fast_shutdown_notify_mon) {
dout(0) << __func__ << " telling mon we are shutting down and dead " << dendl;
monc->send_mon_message(
new MOSDMarkMeDown(
monc->get_fsid(),
whoami,
osdmap->get_addrs(whoami),
osdmap->get_epoch(),
true, // request ack
true // mark as down and dead
));
} else {
dout(0) << __func__ << " telling mon we are shutting down" << dendl;
monc->send_mon_message(
new MOSDMarkMeDown(
monc->get_fsid(),
whoami,
osdmap->get_addrs(whoami),
osdmap->get_epoch(),
true // request ack
));
}
const auto timeout = ceph::make_timespan(cct->_conf->osd_mon_shutdown_timeout);
is_stopping_cond.wait_for(l, timeout,
[this] { return get_state() == STOPPING; });
}

dout(0) << __func__ << " starting shutdown" << dendl;
set_state(STOPPING);
return true;
Expand Down
4 changes: 3 additions & 1 deletion src/vstart.sh
Original file line number Diff line number Diff line change
Expand Up @@ -794,7 +794,7 @@ $DAEMONOPTS
osd class dir = $OBJCLASS_PATH
osd class load list = *
osd class default list = *
osd fast shutdown = false
osd fast shutdown = true

filestore wbthrottle xfs ios start flusher = 10
filestore wbthrottle xfs ios hard limit = 20
Expand Down Expand Up @@ -1408,6 +1408,8 @@ osd_scrub_load_threshold = 2000
osd_debug_op_order = true
osd_debug_misdirected_ops = true
osd_copyfrom_max_chunk = 524288
osd fast shutdown = true
osd fast shutdown notify mon= true

[mds]
mds_debug_frag = true
Expand Down