Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

805 ping interval #835

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ Bob versions changelog
- Added mimalloc allocator for musl target (#688)
- Added jemalloc-profile for memory profiling (#797)
- Proper support for GetSource::ALL requests (#723)
- Add check timeout config option (#805)

#### Changed
- BobClient clone overhead reduced (#774)
Expand Down
6 changes: 5 additions & 1 deletion bob-apps/bin/bobd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,11 @@ async fn run_server<A: Authenticator>(node: NodeConfig, authenticator: A, mapper
tls_domain_name: tls_config.domain_name.clone(),
}
});
let factory = Factory::new(node.operation_timeout(), metrics, node.name().into(), factory_tls_config);
let factory = Factory::new(node.operation_timeout(),
node.check_timeout(),
metrics,
node.name().into(),
factory_tls_config);

let mut server_builder = Server::builder();
if let Some(node_tls_config) = node.tls_config().as_ref().and_then(|tls_config| tls_config.grpc_config()) {
Expand Down
1 change: 1 addition & 0 deletions bob-backend/src/pearl/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ users_config: users.yaml
name: local_node
quorum: 1
operation_timeout: 3sec
check_timeout: 3sec
check_interval: 5000ms
cluster_policy: quorum # quorum
backend_type: pearl # in_memory, stub, pearl
Expand Down
26 changes: 19 additions & 7 deletions bob-common/src/bob_client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ pub mod b_client {
local_node_name: NodeName,

operation_timeout: Duration,
check_timeout: Duration,
auth_header: String,
metrics: BobClientMetrics,
}
Expand All @@ -42,6 +43,7 @@ pub mod b_client {
pub async fn create(
node: &Node,
operation_timeout: Duration,
check_timeout: Duration,
metrics: BobClientMetrics,
local_node_name: NodeName,
tls_config: Option<&FactoryTlsConfig>,
Expand All @@ -66,11 +68,12 @@ pub mod b_client {
Ok(Self {
client,
target_node_name: node.name().clone(),
target_node_address: node.address().to_owned(),
local_node_name: local_node_name,
operation_timeout: operation_timeout,
auth_header: auth_header,
metrics: metrics
target_node_address: node.address().to_owned(),
local_node_name,
operation_timeout,
check_timeout,
auth_header,
metrics
})
}

Expand Down Expand Up @@ -154,7 +157,7 @@ pub mod b_client {
let mut req = Request::new(Null {});
self.set_credentials(&mut req);
self.set_node_name(&mut req);
self.set_timeout(&mut req);
self.set_check_timeout(&mut req);

let node_name = self.target_node_name.to_owned();
let mut client = self.client.clone();
Expand Down Expand Up @@ -242,11 +245,16 @@ pub mod b_client {
fn set_timeout<T>(&self, r: &mut Request<T>) {
r.set_timeout(self.operation_timeout);
}

fn set_check_timeout<T>(&self, r: &mut Request<T>) {
r.set_timeout(self.check_timeout);
}
}

mock! {
pub BobClient {
pub async fn create<'a>(node: &Node, operation_timeout: Duration, metrics: BobClientMetrics, local_node_name: NodeName, tls_config: Option<&'a FactoryTlsConfig>) -> Result<Self, String>;
pub async fn create<'a>(node: &Node, operation_timeout: Duration, check_timeout: Duration,
metrics: BobClientMetrics, local_node_name: NodeName, tls_config: Option<&'a FactoryTlsConfig>) -> Result<Self, String>;
pub async fn put(&self, key: BobKey, d: BobData, options: PutOptions) -> PutResult;
pub async fn get(&self, key: BobKey, options: GetOptions) -> GetResult;
pub async fn ping(&self) -> PingResult;
Expand Down Expand Up @@ -314,6 +322,7 @@ pub struct FactoryTlsConfig {
#[derive(Clone)]
pub struct Factory {
operation_timeout: Duration,
check_timeout: Duration,
metrics: Arc<dyn MetricsContainerBuilder + Send + Sync>,
local_node_name: NodeName,
tls_config: Option<FactoryTlsConfig>,
Expand All @@ -324,12 +333,14 @@ impl Factory {
#[must_use]
pub fn new(
operation_timeout: Duration,
check_timeout: Duration,
metrics: Arc<dyn MetricsContainerBuilder + Send + Sync>,
local_node_name: NodeName,
tls_config: Option<FactoryTlsConfig>,
) -> Self {
Factory {
operation_timeout,
check_timeout,
metrics,
local_node_name,
tls_config,
Expand All @@ -340,6 +351,7 @@ impl Factory {
BobClient::create(
node,
self.operation_timeout,
self.check_timeout,
metrics,
self.local_node_name.clone(),
self.tls_config.as_ref(),
Expand Down
13 changes: 13 additions & 0 deletions bob-common/src/configs/cluster_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -509,6 +509,7 @@ users_config: users.yaml
name: no
quorum: 1
operation_timeout: 12h 5min 2ns
check_timeout: 12h 5min 2ns
check_interval: 100ms
cluster_policy: quorum # quorum
backend_type: stub
Expand All @@ -526,6 +527,7 @@ users_config: users.yaml
name: no
quorum: 1
operation_timeout: 12h 5min 2ns
check_timeout: 12h 5min 2ns
check_interval: 100ms
cluster_policy: quorum # quorum
backend_type: pearl
Expand All @@ -543,6 +545,7 @@ users_config: users.yaml
name: no
quorum: 1
operation_timeout: 12h 5min 2ns
check_timeout: 12h 5min 2ns
check_interval: 100ms
cluster_policy: quorum # quorum
backend_type: pearl
Expand Down Expand Up @@ -571,6 +574,7 @@ users_config: users.yaml
name: no
quorum: 1
operation_timeout: 12h 5min 2ns
check_timeout: 12h 5min 2ns
check_interval: 100ms
cluster_policy: quorum # quorum
backend_type: pearl
Expand Down Expand Up @@ -599,6 +603,7 @@ users_config: users.yaml
name: no
quorum: 1
operation_timeout: 12h 5min 2ns
check_timeout: 12h 5min 2ns
check_interval: 100ms
cluster_policy: quorum # quorum
backend_type: pearl
Expand Down Expand Up @@ -627,6 +632,7 @@ users_config: users.yaml
name: no
quorum: 1
operation_timeout: 12h 5min 2ns
check_timeout: 12h 5min 2ns
check_interval: 100ms
cluster_policy: quorum # quorum
backend_type: pearl
Expand Down Expand Up @@ -655,6 +661,7 @@ users_config: users.yaml
name: n1
quorum: 1
operation_timeout: 12h 5min 2ns
check_timeout: 12h 5min 2ns
check_interval: 100sec
cluster_policy: quorum # quorum
backend_type: InvalidType
Expand Down Expand Up @@ -690,6 +697,7 @@ users_config: users.yaml
name: no
quorum: 1
operation_timeout: 12h 5min 2ns
check_timeout: 12h 5min 2ns
check_interval: 100mms
cluster_policy: quorum # quorum
backend_type: stub
Expand All @@ -707,6 +715,7 @@ users_config: users.yaml
name: n1
quorum: 1
operation_timeout: 12h 5min 2ns
check_timeout: 12h 5min 2ns
check_interval: 100sec
cluster_policy: quorum # quorum
backend_type: stub
Expand Down Expand Up @@ -741,6 +750,7 @@ users_config: users.yaml
name: 1n2112321321321321
quorum: 1
operation_timeout: 12h 5min 2ns
check_timeout: 12h 5min 2ns
check_interval: 100sec
cluster_policy: quorum # quorum
backend_type: stub
Expand Down Expand Up @@ -775,6 +785,7 @@ users_config: users.yaml
name: no
quorum: 1
operation_timeout: 12h 5min 2ns
check_timeout: 12h 5min 2ns
check_interval: 100ms
cluster_policy: quorum # quorum
backend_type: stub
Expand All @@ -798,6 +809,7 @@ users_config: users.yaml
name: no
quorum: 1
operation_timeout: 12h 5min 2ns
check_timeout: 12h 5min 2ns
check_interval: 100ms
cluster_policy: quorum # quorum
backend_type: stub
Expand All @@ -820,6 +832,7 @@ users_config: users.yaml
name: no
quorum: 1
operation_timeout: 12h 5min 2ns
check_timeout: 12h 5min 2ns
check_interval: 100ms
cluster_policy: quorum # quorum
backend_type: stub
Expand Down
15 changes: 15 additions & 0 deletions bob-common/src/configs/node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -556,6 +556,7 @@ pub struct Node {
name: String,
quorum: usize,
operation_timeout: String,
check_timeout: String,
check_interval: String,
#[serde(default = "NodeConfig::default_count_interval")]
count_interval: String,
Expand Down Expand Up @@ -657,6 +658,14 @@ impl NodeConfig {
.into()
}

/// Get check request operation timeout, parsed from humantime format.
pub fn check_timeout(&self) -> Duration {
self.check_timeout
.parse::<HumanDuration>()
.expect("parse humantime duration")
.into()
}

pub fn check_interval(&self) -> Duration {
self.check_interval
.parse::<HumanDuration>()
Expand Down Expand Up @@ -806,6 +815,7 @@ impl NodeConfig {
Self::check_unset_single(&self.users_config, "users_config")?;
Self::check_unset_single(&self.name, "name")?;
Self::check_unset_single(&self.operation_timeout, "operation_timeout")?;
Self::check_unset_single(&self.check_timeout, "check_timeout")?;
Self::check_unset_single(&self.backend_type, "backend_type")?;
Ok(())
}
Expand Down Expand Up @@ -850,6 +860,7 @@ impl NodeConfig {
name: String::from(node_name),
quorum: 1,
operation_timeout: String::from("60sec"),
check_timeout: String::from("5sec"),
check_interval: String::from("5000ms"),
count_interval: NodeConfig::default_count_interval(),
cluster_policy: String::from("quorum"),
Expand Down Expand Up @@ -890,6 +901,9 @@ impl Validatable for NodeConfig {
self.operation_timeout.parse::<HumanDuration>().map_err(|e| {
format!("field 'timeout' for 'config' is not valid: {}", e)
})?;
self.check_timeout.parse::<HumanDuration>().map_err(|e| {
format!("field 'timeout' for 'config' is not valid: {}", e)
})?;
self.check_interval.parse::<HumanDuration>().map_err(|e| {
format!("field 'check_interval' for 'config' is not valid: {}", e)
})?;
Expand Down Expand Up @@ -924,6 +938,7 @@ pub mod tests {
name: name.to_string(),
quorum,
operation_timeout: "3sec".to_string(),
check_timeout: "1sec".to_string(),
check_interval: "3sec".to_string(),
cluster_policy: "quorum".to_string(),
backend_type: "in_memory".to_string(),
Expand Down
13 changes: 7 additions & 6 deletions bob/src/link_manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,21 +57,22 @@ impl LinkManager {
interval.tick().await;
let mut err_cnt = 0;
let mut status = String::from("Node status: ");
for node in nodes.iter() {
if let Err(e) = node.check(&factory).await {
let mut futures :FuturesUnordered<_> = nodes.iter().map(|n| n.check(&factory) .map(|r| (n.name().clone(), r))).collect();
while let Some((name, res)) = futures.next().await {
if let Err(e) = res {
if log_in_this_iter {
error!(
"No connection to {}:[{}] - {}",
node.name(),
node.address(),
name,
nodes.iter().find(|n| n.name() == &name).unwrap().address(),
e
);
status += &format!("[-]{:<10} ", node.name());
status += &format!("[-]{:<10} ", name);
}
err_cnt += 1;
} else {
if log_in_this_iter {
status += &format!("[+]{:<10} ", node.name());
status += &format!("[+]{:<10} ", name);
}
}
}
Expand Down
1 change: 1 addition & 0 deletions compose_examples/one_node/configs/node.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ log_config: /configs/logger.yaml
name: node
quorum: 1
operation_timeout: 15sec
check_timeout: 3sec
check_interval: 5000ms
cluster_policy: quorum
backend_type: pearl
Expand Down
1 change: 1 addition & 0 deletions compose_examples/two_nodes/configs/node_0.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ log_config: /configs/logger.yaml
name: node_0
quorum: 2
operation_timeout: 15sec
check_timeout: 3sec
check_interval: 5000ms
cluster_policy: quorum
backend_type: pearl
Expand Down
1 change: 1 addition & 0 deletions compose_examples/two_nodes/configs/node_1.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ log_config: /configs/logger.yaml
name: node_1
quorum: 2
operation_timeout: 15sec
check_timeout: 3sec
check_interval: 5000ms
cluster_policy: quorum
backend_type: pearl
Expand Down
1 change: 1 addition & 0 deletions compose_examples/two_nodes_swarm/configs/node0.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ log_config: /configs/logger.yaml
name: node0
quorum: 2
operation_timeout: 15sec
check_timeout: 3sec
check_interval: 5000ms
cleanup_interval: 60s
cluster_policy: quorum
Expand Down
1 change: 1 addition & 0 deletions compose_examples/two_nodes_swarm/configs/node1.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ log_config: /configs/logger.yaml
name: node1
quorum: 2
operation_timeout: 15sec
check_timeout: 3sec
check_interval: 5000ms
cleanup_interval: 60s
cluster_policy: quorum
Expand Down
1 change: 1 addition & 0 deletions compose_examples/two_nodes_swarm/configs/node2.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ log_config: /configs/logger.yaml
name: node2
quorum: 2
operation_timeout: 15sec
check_timeout: 3sec
check_interval: 5000ms
cleanup_interval: 60s
cluster_policy: quorum
Expand Down
1 change: 1 addition & 0 deletions compose_examples/two_nodes_swarm/configs/node3.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ log_config: /configs/logger.yaml
name: node3
quorum: 2
operation_timeout: 15sec
check_timeout: 3sec
check_interval: 5000ms
cleanup_interval: 60s
cluster_policy: quorum
Expand Down
2 changes: 2 additions & 0 deletions config-examples/node.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ name: local_node
quorum: 1
# [time] timeout for every GRPC operation
operation_timeout: 3sec
# [time] timeout for node check operation
check_timeout: 3sec
# [time] interval for checking connections
check_interval: 5000ms
# [simple, quorum] simple - without checking status
Expand Down
2 changes: 2 additions & 0 deletions dockerfiles/default-configs/node.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ name: local_node
quorum: 1
# [time] timeout for every GRPC operation
operation_timeout: 3sec
# [time] timeout for node check operation
check_timeout: 3sec
# [time] interval for checking connections
check_interval: 5000ms
# [simple, quorum] simple - without checking status
Expand Down
3 changes: 2 additions & 1 deletion test_env/node1.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ users_config: /bob/config-examples/users.yaml
name: node1
quorum: 3
operation_timeout: 3sec
check_timeout: 3sec
check_interval: 5000ms
cleanup_interval: 5000ms
cluster_policy: quorum
Expand All @@ -27,4 +28,4 @@ metrics:
prefix: '{metrics_name}.{local_address}.{node_name}'
graphite: 127.0.0.1:2003
graphite_enabled: false
prometheus_enabled: false
prometheus_enabled: false
Loading