Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Trigger manual failover on SIGTERM / shutdown to cluster primary #1091

Open
wants to merge 18 commits into
base: unstable
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 35 additions & 1 deletion src/cluster_legacy.c
Original file line number Diff line number Diff line change
Expand Up @@ -1192,6 +1192,35 @@ void clusterInitLast(void) {

/* Called when a cluster node receives SHUTDOWN. */
void clusterHandleServerShutdown(void) {
if (server.auto_failover_on_shutdown) {
/* Find the first best replica, that is, the replica with the largest offset. */
client *best_replica = NULL;
listIter replicas_iter;
listNode *replicas_list_node;
listRewind(server.replicas, &replicas_iter);
while ((replicas_list_node = listNext(&replicas_iter)) != NULL) {
client *replica = listNodeValue(replicas_list_node);
/* This is done only when the replica offset is caught up, to avoid data loss */
if (replica->repl_state == REPLICA_STATE_ONLINE && replica->repl_ack_off == server.primary_repl_offset) {
best_replica = replica;
break;
}
}

if (best_replica) {
/* Send a CLUSTER FAILOVER FORCE to the best replica. */
const char *buf = "*3\r\n$7\r\nCLUSTER\r\n$8\r\nFAILOVER\r\n$5\r\nFORCE\r\n";
if (connWrite(best_replica->conn, buf, strlen(buf)) == (int)strlen(buf)) {
enjoy-binbin marked this conversation as resolved.
Show resolved Hide resolved
serverLog(LL_NOTICE, "Sending CLUSTER FAILOVER FORCE to replica %s succeeded.",
replicationGetReplicaName(best_replica));
} else {
serverLog(LL_WARNING, "Failed to send CLUSTER FAILOVER FORCE to replica: %s", strerror(errno));
}
} else {
serverLog(LL_NOTICE, "Unable to find a replica to perform an auto failover on shutdown.");
}
}

/* The error logs have been logged in the save function if the save fails. */
serverLog(LL_NOTICE, "Saving the cluster configuration file before exiting.");
clusterSaveConfig(1);
Expand Down Expand Up @@ -4588,6 +4617,7 @@ void clusterHandleReplicaFailover(void) {
if (server.cluster->mf_end) {
server.cluster->failover_auth_time = mstime();
server.cluster->failover_auth_rank = 0;
server.cluster->failover_auth_count++;
clusterDoBeforeSleep(CLUSTER_TODO_HANDLE_FAILOVER);
}
serverLog(LL_NOTICE,
Expand Down Expand Up @@ -6726,7 +6756,11 @@ int clusterCommandSpecial(client *c) {
/* If this is a forced failover, we don't need to talk with our
* primary to agree about the offset. We just failover taking over
* it without coordination. */
serverLog(LL_NOTICE, "Forced failover user request accepted (user request from '%s').", client);
if (c == server.primary) {
serverLog(LL_NOTICE, "Forced failover primary request accepted (primary request from '%s').", client);
} else {
serverLog(LL_NOTICE, "Forced failover user request accepted (user request from '%s').", client);
}
server.cluster->mf_can_start = 1;
/* We can start a manual failover as soon as possible, setting a flag
* here so that we don't need to waiting for the cron to kick in. */
Expand Down
1 change: 1 addition & 0 deletions src/config.c
Original file line number Diff line number Diff line change
Expand Up @@ -3136,6 +3136,7 @@ standardConfig static_configs[] = {
createBoolConfig("enable-debug-assert", NULL, IMMUTABLE_CONFIG | HIDDEN_CONFIG, server.enable_debug_assert, 0, NULL, NULL),
createBoolConfig("cluster-slot-stats-enabled", NULL, MODIFIABLE_CONFIG, server.cluster_slot_stats_enabled, 0, NULL, NULL),
createBoolConfig("hide-user-data-from-log", NULL, MODIFIABLE_CONFIG, server.hide_user_data_from_log, 1, NULL, NULL),
createBoolConfig("auto-failover-on-shutdown", NULL, MODIFIABLE_CONFIG, server.auto_failover_on_shutdown, 0, NULL, NULL),

/* String Configs */
createStringConfig("aclfile", NULL, IMMUTABLE_CONFIG, ALLOW_EMPTY_STRING, server.acl_filename, "", NULL, NULL),
Expand Down
1 change: 1 addition & 0 deletions src/server.h
Original file line number Diff line number Diff line change
Expand Up @@ -2176,6 +2176,7 @@ struct valkeyServer {
unsigned long cluster_blacklist_ttl; /* Duration in seconds that a node is denied re-entry into
* the cluster after it is forgotten with CLUSTER FORGET. */
int cluster_slot_stats_enabled; /* Cluster slot usage statistics tracking enabled. */
int auto_failover_on_shutdown; /* Trigger manual failover on shutdown to primary. */
/* Debug config that goes along with cluster_drop_packet_filter. When set, the link is closed on packet drop. */
uint32_t debug_cluster_close_link_on_packet_drop : 1;
sds cached_cluster_slot_info[CACHE_CONN_TYPE_MAX]; /* Index in array is a bitwise or of CACHE_CONN_TYPE_* */
Expand Down
25 changes: 25 additions & 0 deletions tests/support/util.tcl
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,31 @@ proc wait_replica_online r {
}
}

proc check_replica_acked_ofs {primary replica_ip replica_port} {
set infostr [$primary info replication]
set master_repl_offset [getInfoProperty $infostr master_repl_offset]
if {[regexp -lineanchor "^slave\\d:ip=$replica_ip,port=$replica_port,.*,offset=(\\d+).*\r\n" $infostr _ offset]} {
if {$master_repl_offset == $offset} {
return 1
}
return 0
}
return 0
}

proc wait_replica_acked_ofs {primary replica replica_ip replica_port} {
$primary config set repl-ping-replica-period 3600
$replica config set hz 500
wait_for_condition 100 100 {
[check_replica_acked_ofs $primary $replica_ip $replica_port] eq 1
} else {
puts "INFO REPLICATION: [$primary info replication]"
fail "replica acked offset didn't match in time"
}
$primary config set repl-ping-replica-period 10
$replica config set hz 10
}

proc wait_for_ofs_sync {r1 r2} {
wait_for_condition 50 100 {
[status $r1 master_repl_offset] eq [status $r2 master_repl_offset]
Expand Down
93 changes: 93 additions & 0 deletions tests/unit/cluster/auto-failover-on-shutdown.tcl
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
proc shutdown_how {srv_id how} {
if {$how == "shutdown"} {
catch {R $srv_id shutdown nosave}
} elseif {$how == "sigterm"} {
exec kill -SIGTERM [s -$srv_id process_id]
}
}

# We will start a cluster with 3 primary nodes and 4 replicas, the primary 1 will have 2 replicas.
# We will pause the replica 1, and then shutdown the primary 1, and making replica 2 to become
# the new primary.
proc test_main {how shutdown_timeout} {
test "auto-failover-on-shutdown will always pick a best replica and send CLUSTER FAILOVER - $how - shutdown-timeout: $shutdown_timeout" {
set primary [srv 0 client]
set replica1 [srv -3 client]
set replica1_pid [s -3 process_id]
set replica2 [srv -6 client]
set replica2_ip [srv -6 host]
set replica2_port [srv -6 port]

$primary config set auto-failover-on-shutdown yes
$primary config set shutdown-timeout $shutdown_timeout
$primary config set repl-ping-replica-period 3600

# To avoid failover kick in.
$replica2 config set cluster-replica-no-failover yes

# Pause a replica so it has no chance to catch up with the offset.
pause_process $replica1_pid

# Primary write some data to increase the offset.
for {set i 0} {$i < 10} {incr i} {
$primary incr key_991803
}

if {$shutdown_timeout == 0} {
# Wait the replica2 catch up with the offset
wait_for_ofs_sync $primary $replica2
wait_replica_acked_ofs $primary $replica2 $replica2_ip $replica2_port
} else {
# If shutdown-timeout is enable, we expect the primary to pause writing
# and wait for the replica to catch up with the offset.
}

# Shutdown the primary.
shutdown_how 0 $how

# Wait for the replica2 to become a primary.
wait_for_condition 1000 50 {
[s -6 role] eq {master}
zuiderkwast marked this conversation as resolved.
Show resolved Hide resolved
} else {
puts "s -6 role: [s -6 role]"
fail "Failover does not happened"
}

# Make sure that the expected logs are printed.
verify_log_message 0 "*Sending CLUSTER FAILOVER FORCE to replica*" 0
verify_log_message -6 "*Forced failover primary request accepted*" 0

resume_process $replica1_pid
}

test "Unable to find a replica to perform an auto failover - $how" {
set primary [srv -6 client]
set replica1 [srv -3 client]
set replica1_pid [s -3 process_id]

pause_process $replica1_pid

$primary config set auto-failover-on-shutdown yes
$primary client kill type replica
shutdown_how 6 $how
wait_for_log_messages -6 {"*Unable to find a replica to perform an auto failover on shutdown*"} 0 1000 10

resume_process $replica1_pid
}
}

start_cluster 3 4 {tags {external:skip cluster}} {
test_main "shutdown" 0
}

start_cluster 3 4 {tags {external:skip cluster}} {
test_main "sigterm" 0
}

start_cluster 3 4 {tags {external:skip cluster}} {
test_main "shutdown" 10
}

start_cluster 3 4 {tags {external:skip cluster}} {
test_main "sigterm" 10
}
4 changes: 4 additions & 0 deletions valkey.conf
Original file line number Diff line number Diff line change
Expand Up @@ -1601,6 +1601,10 @@ aof-timestamp-enabled no
# shutdown-on-sigint default
# shutdown-on-sigterm default

# TODO
#
# auto-failover-on-shutdown no

################ NON-DETERMINISTIC LONG BLOCKING COMMANDS #####################

# Maximum time in milliseconds for EVAL scripts, functions and in some cases
Expand Down
Loading