-
Notifications
You must be signed in to change notification settings - Fork 696
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
CRC removal during diskless full sync with TLS enabled. #1479
base: unstable
Are you sure you want to change the base?
Changes from 9 commits
67fa361
2e5314e
f33f78f
c605a83
dd180dd
6126177
5c34391
e481f73
14eeb6f
266cd65
be9bb4d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1245,11 +1245,13 @@ void syncCommand(client *c) { | |
* the primary can accurately lists replicas and their listening ports in the | ||
* INFO output. | ||
* | ||
* - capa <eof|psync2|dual-channel> | ||
* - capa <eof|psync2|dual-channel|bypass-crc> | ||
* What is the capabilities of this instance. | ||
* eof: supports EOF-style RDB transfer for diskless replication. | ||
* psync2: supports PSYNC v2, so understands +CONTINUE <new repl ID>. | ||
* dual-channel: supports full sync using rdb channel. | ||
* bypass-crc: supports skipping CRC calculations during diskless sync using | ||
* a connection that has integrity checks (such as TLS). | ||
* | ||
* - ack <offset> [fack <aofofs>] | ||
* Replica informs the primary the amount of replication stream that it | ||
|
@@ -1315,7 +1317,8 @@ void replconfCommand(client *c) { | |
/* If dual-channel is disable on this primary, treat this command as unrecognized | ||
* replconf option. */ | ||
c->replica_capa |= REPLICA_CAPA_DUAL_CHANNEL; | ||
} | ||
} else if (!strcasecmp(c->argv[j + 1]->ptr, REPLICA_CAPA_BYPASS_CRC_STR)) | ||
c->replica_capa |= REPLICA_CAPA_BYPASS_CRC; | ||
} else if (!strcasecmp(c->argv[j]->ptr, "ack")) { | ||
/* REPLCONF ACK is used by replica to inform the primary the amount | ||
* of replication stream that it processed so far. It is an | ||
|
@@ -1973,6 +1976,11 @@ static int useDisklessLoad(void) { | |
return enabled; | ||
} | ||
|
||
/* Returns 1 if the replica can skip CRC calculations during full sync */ | ||
int replicationSupportBypassCRC(connection *conn, int is_replica_diskless_load, int is_primary_diskless_sync) { | ||
return is_replica_diskless_load && is_primary_diskless_sync && connIsIntegrityChecked(conn); | ||
} | ||
|
||
/* Helper function for readSyncBulkPayload() to initialize tempDb | ||
* before socket-loading the new db from primary. The tempDb may be populated | ||
* by swapMainDbWithTempDb or freed by disklessLoadDiscardTempDb later. */ | ||
|
@@ -2252,7 +2260,14 @@ void readSyncBulkPayload(connection *conn) { | |
|
||
serverLog(LL_NOTICE, "PRIMARY <-> REPLICA sync: Loading DB in memory"); | ||
startLoading(server.repl_transfer_size, RDBFLAGS_REPLICATION, asyncLoading); | ||
|
||
if (replicationSupportBypassCRC(conn, use_diskless_load, usemark)) { | ||
/* We can bypass CRC checks when data is transmitted through a verified stream. | ||
* The usemark flag indicates that the primary is streaming the data directly without | ||
* writing it to storage. | ||
* Similarly, the use_diskless_load flag indicates that the | ||
* replica will load the payload directly into memory without first writing it to disk. */ | ||
rdb.flags |= RIO_FLAG_BYPASS_CRC; | ||
} | ||
int loadingFailed = 0; | ||
rdbLoadingCtx loadingCtx = {.dbarray = dbarray, .functions_lib_ctx = functions_lib_ctx}; | ||
if (rdbLoadRioWithLoadingCtxScopedRdb(&rdb, RDBFLAGS_REPLICATION, &rsi, &loadingCtx) != C_OK) { | ||
|
@@ -2494,6 +2509,7 @@ char *sendCommand(connection *conn, ...) { | |
while (1) { | ||
arg = va_arg(ap, char *); | ||
if (arg == NULL) break; | ||
if (strcmp(arg, "") == 0) continue; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A few things things:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. IMO using some const indicator is not the correct way. Can we just use SendCommandArgv? |
||
cmdargs = sdscatprintf(cmdargs, "$%zu\r\n%s\r\n", strlen(arg), arg); | ||
argslen++; | ||
} | ||
|
@@ -3511,11 +3527,19 @@ void syncWithPrimary(connection *conn) { | |
* | ||
* EOF: supports EOF-style RDB transfer for diskless replication. | ||
* PSYNC2: supports PSYNC v2, so understands +CONTINUE <new repl ID>. | ||
* BYPASS-CRC: supports skipping CRC calculations during full sync. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. BYPASS isn't the right word, since it implies we are doing something else instead. I think |
||
* Inform the primary of this capa only during diskless sync using a | ||
* connection that has integrity checks (such as TLS). | ||
* In disk-based sync, or non-integrity-checked connection, there is more | ||
* concern for data corruprion so we keep this extra layer of detection. | ||
* | ||
* The primary will ignore capabilities it does not understand. */ | ||
int send_bypass_crc_capa = replicationSupportBypassCRC(conn, useDisklessLoad(), 1); | ||
err = sendCommand(conn, "REPLCONF", "capa", "eof", "capa", "psync2", | ||
server.dual_channel_replication ? "capa" : NULL, | ||
server.dual_channel_replication ? "dual-channel" : NULL, NULL); | ||
send_bypass_crc_capa ? "capa" : "", | ||
send_bypass_crc_capa ? REPLICA_CAPA_BYPASS_CRC_STR : "", | ||
server.dual_channel_replication ? "capa" : "", | ||
server.dual_channel_replication ? "dual-channel" : "", NULL); | ||
if (err) goto write_error; | ||
|
||
/* Inform the primary of our (replica) version. */ | ||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -425,6 +425,7 @@ void rioFreeFd(rio *r) { | |||||
/* This function can be installed both in memory and file streams when checksum | ||||||
* computation is needed. */ | ||||||
void rioGenericUpdateChecksum(rio *r, const void *buf, size_t len) { | ||||||
if ((r->flags & RIO_FLAG_BYPASS_CRC) != 0) return; // skip CRC64 calculations | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
r->cksum = crc64(r->cksum, buf, len); | ||||||
} | ||||||
|
||||||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -2639,6 +2639,7 @@ void resetServerStats(void) { | |||||
server.stat_fork_rate = 0; | ||||||
server.stat_total_forks = 0; | ||||||
server.stat_rejected_conn = 0; | ||||||
server.stat_total_sync_bypass_crc = 0; | ||||||
server.stat_sync_full = 0; | ||||||
server.stat_sync_partial_ok = 0; | ||||||
server.stat_sync_partial_err = 0; | ||||||
|
@@ -5879,6 +5880,7 @@ sds genValkeyInfoString(dict *section_dict, int all_sections, int everything) { | |||||
"instantaneous_input_repl_kbps:%.2f\r\n", (float)getInstantaneousMetric(STATS_METRIC_NET_INPUT_REPLICATION) / 1024, | ||||||
"instantaneous_output_repl_kbps:%.2f\r\n", (float)getInstantaneousMetric(STATS_METRIC_NET_OUTPUT_REPLICATION) / 1024, | ||||||
"rejected_connections:%lld\r\n", server.stat_rejected_conn, | ||||||
"total_sync_bypass_crc:%ld\r\n", server.stat_total_sync_bypass_crc, | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
What is the use of this metric? What are end users supposed to do with this information? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This metric is merely for testing purposes There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We generally prefer using log lines for testing purposes, since these info fields are part of our public facing contract and we can't change them. |
||||||
"sync_full:%lld\r\n", server.stat_sync_full, | ||||||
"sync_partial_ok:%lld\r\n", server.stat_sync_partial_ok, | ||||||
"sync_partial_err:%lld\r\n", server.stat_sync_partial_err, | ||||||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -447,6 +447,9 @@ static ConnectionType CT_Socket = { | |||||
.process_pending_data = NULL, | ||||||
.postpone_update_state = NULL, | ||||||
.update_state = NULL, | ||||||
|
||||||
/* Miselenious */ | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
.connIntegrityChecked = NULL, | ||||||
}; | ||||||
|
||||||
int connBlock(connection *conn) { | ||||||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -814,6 +814,10 @@ static int connTLSListen(connListener *listener) { | |||||
return listenToPort(listener); | ||||||
} | ||||||
|
||||||
static int connTLSIsIntegrityChecked(void) { | ||||||
return 1; | ||||||
} | ||||||
|
||||||
static void connTLSCloseListener(connListener *listener) { | ||||||
connectionTypeTcp()->closeListener(listener); | ||||||
} | ||||||
|
@@ -1186,6 +1190,9 @@ static ConnectionType CT_TLS = { | |||||
|
||||||
/* TLS specified methods */ | ||||||
.get_peer_cert = connTLSGetPeerCert, | ||||||
|
||||||
/* Miselenious */ | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
.connIntegrityChecked = connTLSIsIntegrityChecked, | ||||||
}; | ||||||
|
||||||
int RedisRegisterConnectionTypeTLS(void) { | ||||||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -207,6 +207,9 @@ static ConnectionType CT_Unix = { | |||||
.process_pending_data = NULL, | ||||||
.postpone_update_state = NULL, | ||||||
.update_state = NULL, | ||||||
|
||||||
/* Miselenious */ | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
.connIntegrityChecked = NULL, | ||||||
}; | ||||||
|
||||||
int RedisRegisterConnectionTypeUnix(void) { | ||||||
|
Original file line number | Diff line number | Diff line change | ||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|
@@ -0,0 +1,37 @@ | ||||||||||||
start_server {tags {"repl tls"} overrides {save {}}} { | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We only need to run these tests with TLS enabled, otherwise this condition is always false. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Correct. Shouldn't we have tests to check that without TLS we do not skip CRC? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can't we just have one test for that though? do we really need the 6 that we have? |
||||||||||||
set primary [srv 0 client] | ||||||||||||
set primary_host [srv 0 host] | ||||||||||||
set primary_port [srv 0 port] | ||||||||||||
set primary_bypassed_crc_counter 0 | ||||||||||||
foreach mds {no yes} { | ||||||||||||
foreach sdl {disabled on-empty-db swapdb flush-before-load} { | ||||||||||||
Comment on lines
+6
to
+7
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't know what either mds or sdl stand for, can we use more normal names? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I used the naming used in other tests I saw just to stay with format. Will change it |
||||||||||||
test "Bypass CRC sync - tls:$::tls, repl_diskless_sync:$mds, repl_diskless_load:$sdl" { | ||||||||||||
$primary config set repl-diskless-sync $mds | ||||||||||||
start_server {overrides {save {}}} { | ||||||||||||
set replica [srv 0 client] | ||||||||||||
$replica config set repl-diskless-load $sdl | ||||||||||||
$replica replicaof $primary_host $primary_port | ||||||||||||
|
||||||||||||
wait_for_condition 50 100 { | ||||||||||||
[string match {*master_link_status:up*} [$replica info replication]] | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||
} else { | ||||||||||||
fail "Replication not started" | ||||||||||||
} | ||||||||||||
|
||||||||||||
set replica_bypassing_crc_count [string match {*total_sync_bypass_crc:1*} [$replica info stats]] | ||||||||||||
set stats [regexp -inline {total_sync_bypass_crc:(\d+)} [$primary info stats]] | ||||||||||||
set primary_bypass_crc_count [lindex $stats 1] | ||||||||||||
Comment on lines
+21
to
+23
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||
|
||||||||||||
if {$sdl eq "disabled" || $mds eq "no" || !$::tls} { | ||||||||||||
assert_equal $primary_bypassed_crc_counter $primary_bypass_crc_count "Primary should not bypass CRC in this scenario" | ||||||||||||
assert_equal 0 $replica_bypassing_crc_count "Replica should not bypass CRC in this scenario" | ||||||||||||
} else { | ||||||||||||
incr primary_bypassed_crc_counter | ||||||||||||
assert_equal $primary_bypassed_crc_counter $primary_bypass_crc_count "Primary should bypass CRC in this scenario" | ||||||||||||
assert_equal 1 $replica_bypassing_crc_count "Replica should bypass CRC in this scenario" | ||||||||||||
} | ||||||||||||
} | ||||||||||||
} | ||||||||||||
} | ||||||||||||
} | ||||||||||||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This comment is just describing the next line, which seems very self explanatory to me.