Skip to content

Commit

Permalink
retry extensions: implement "other priority" extension (envoyproxy#4529)
Browse files Browse the repository at this point in the history
Implements a RetryPriority which will keep track of attempted
priorities and attempt to route retry requests to other priorities. The
update frequency is configurable, allowing multiple requests to hit each
priority if desired.

As a fallback, when no healthy priorities remain, the list of attempted
priorities will be reset and a host will selected again using the
original priority load.

Extracts out the recalculatePerPriorityState from LoadBalancerBase to
recompute the priority load with the same code used by the LB.

Signed-off-by: Snow Pettersen [email protected]

Risk Level: Medium, new extension
Testing: unit tests
Docs Changes: n/a
Release Notes: n/a
Signed-off-by: Snow Pettersen <[email protected]>
  • Loading branch information
snowp authored and alyssawilk committed Oct 4, 2018
1 parent 3e2eff4 commit ba5d3f0
Show file tree
Hide file tree
Showing 19 changed files with 565 additions and 27 deletions.
11 changes: 11 additions & 0 deletions api/envoy/config/retry/other_priority/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
licenses(["notice"]) # Apache 2

load("//bazel:api_build_system.bzl", "api_proto_library_internal")

api_proto_library_internal(
name = "other_priority",
srcs = ["other_priority_config.proto"],
deps = [
"//envoy/api/v2/core:base",
],
)
37 changes: 37 additions & 0 deletions api/envoy/config/retry/other_priority/other_priority_config.proto
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
syntax = "proto3";

package envoy.config.retry.other_priority;

// A retry host selector that attempts to spread retries between priorities, even if certain
// priorities would not normally be attempted due to higher priorities being available.
//
// As priorities get excluded, load will be distributed amongst the remaining healthy priorities
// based on the relative health of the priorities, matching how load is distributed during regular
// host selection. For example, given priority healths of {100, 50, 50}, the original load will be
// {100, 0, 0} (since P0 has capacity to handle 100% of the traffic). If P0 is excluded, the load
// changes to {0, 50, 50}, because P1 is only able to handle 50% of the traffic, causing the
// remaining to spill over to P2.
//
// Each priority attempted will be excluded until there are no healthy priorities left, at which
// point the list of attempted priorities will be reset, essentially starting from the beginning.
// For example, given three priorities P0, P1, P2 with healthy % of 100, 0 and 50 respectively, the
// following sequence of priorities would be selected (assuming update_frequency = 1):
// Attempt 1: P0 (P0 is 100% healthy)
// Attempt 2: P2 (P0 already attempted, P2 only healthy priority)
// Attempt 3: P0 (no healthy priorities, reset)
// Attempt 4: P2
//
// Using this PriorityFilter requires rebuilding the priority load, which runs in O(# of
// priorities), which might incur significant overhead for clusters with many priorities.
message OtherPriorityConfig {
// How often the priority load should be updated based on previously attempted priorities. Useful
// to allow each priorities to receive more than one request before being excluded or to reduce
// the number of times that the priority load has to be recomputed.
//
// For example, by setting this to 2, then the first two attempts (initial attempt and first
// retry) will use the unmodified priority load. The third and fourth attempt will use priority
// load which excludes the priorities routed to with the first two attempts, and the fifth and
// sixth attempt will use the priority load excluding the priorities used for the first four
// attempts.
int32 update_frequency = 1;
}
4 changes: 4 additions & 0 deletions include/envoy/upstream/retry.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,8 @@ class RetryPriorityFactory {
const Protobuf::Message& config, uint32_t retry_count) PURE;

virtual std::string name() const PURE;

virtual ProtobufTypes::MessagePtr createEmptyConfigProto() PURE;
};

/**
Expand All @@ -125,6 +127,8 @@ class RetryHostPredicateFactory {
* @return name name of this factory.
*/
virtual std::string name() PURE;

virtual ProtobufTypes::MessagePtr createEmptyConfigProto() PURE;
};

} // namespace Upstream
Expand Down
16 changes: 11 additions & 5 deletions source/common/router/config_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,15 +53,21 @@ RetryPolicyImpl::RetryPolicyImpl(const envoy::api::v2::route::RouteAction& confi
retry_on_ |= RetryStateImpl::parseRetryGrpcOn(config.retry_policy().retry_on());

for (const auto& host_predicate : config.retry_policy().retry_host_predicate()) {
Registry::FactoryRegistry<Upstream::RetryHostPredicateFactory>::getFactory(
host_predicate.name())
->createHostPredicate(*this, host_predicate.config(), num_retries_);
auto& factory =
::Envoy::Config::Utility::getAndCheckFactory<Upstream::RetryHostPredicateFactory>(
host_predicate.name());

auto config = ::Envoy::Config::Utility::translateToFactoryConfig(host_predicate, factory);
factory.createHostPredicate(*this, *config, num_retries_);
}

const auto retry_priority = config.retry_policy().retry_priority();
if (!retry_priority.name().empty()) {
Registry::FactoryRegistry<Upstream::RetryPriorityFactory>::getFactory(retry_priority.name())
->createRetryPriority(*this, retry_priority.config(), num_retries_);
auto& factory = ::Envoy::Config::Utility::getAndCheckFactory<Upstream::RetryPriorityFactory>(
retry_priority.name());

auto config = ::Envoy::Config::Utility::translateToFactoryConfig(retry_priority, factory);
factory.createRetryPriority(*this, *config, num_retries_);
}

auto host_selection_attempts = config.retry_policy().host_selection_retry_max_attempts();
Expand Down
44 changes: 24 additions & 20 deletions source/common/upstream/load_balancer_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -44,25 +44,29 @@ LoadBalancerBase::LoadBalancerBase(const PrioritySet& priority_set, ClusterStats
common_config, healthy_panic_threshold, 100, 50)),
priority_set_(priority_set) {
for (auto& host_set : priority_set_.hostSetsPerPriority()) {
recalculatePerPriorityState(host_set->priority());
recalculatePerPriorityState(host_set->priority(), priority_set_, per_priority_load_,
per_priority_health_);
}
priority_set_.addMemberUpdateCb(
[this](uint32_t priority, const HostVector&, const HostVector&) -> void {
recalculatePerPriorityState(priority);
});
priority_set_.addMemberUpdateCb([this](uint32_t priority, const HostVector&,
const HostVector&) -> void {
recalculatePerPriorityState(priority, priority_set_, per_priority_load_, per_priority_health_);
});
}

void LoadBalancerBase::recalculatePerPriorityState(uint32_t priority) {
per_priority_load_.resize(priority_set_.hostSetsPerPriority().size());
per_priority_health_.resize(priority_set_.hostSetsPerPriority().size());
void LoadBalancerBase::recalculatePerPriorityState(uint32_t priority,
const PrioritySet& priority_set,
PriorityLoad& per_priority_load,
std::vector<uint32_t>& per_priority_health) {
per_priority_load.resize(priority_set.hostSetsPerPriority().size());
per_priority_health.resize(priority_set.hostSetsPerPriority().size());

// Determine the health of the newly modified priority level.
// Health ranges from 0-100, and is the ratio of healthy hosts to total hosts, modified by the
// overprovisioning factor.
HostSet& host_set = *priority_set_.hostSetsPerPriority()[priority];
per_priority_health_[priority] = 0;
HostSet& host_set = *priority_set.hostSetsPerPriority()[priority];
per_priority_health[priority] = 0;
if (host_set.hosts().size() > 0) {
per_priority_health_[priority] =
per_priority_health[priority] =
std::min<uint32_t>(100, (host_set.overprovisioning_factor() *
host_set.healthyHosts().size() / host_set.hosts().size()));
}
Expand All @@ -74,32 +78,32 @@ void LoadBalancerBase::recalculatePerPriorityState(uint32_t priority) {
// 3 host sets with 20% / 20% / 10% health they will get 40% / 40% / 20% load to ensure total load
// adds up to 100.
const uint32_t total_health = std::min<uint32_t>(
std::accumulate(per_priority_health_.begin(), per_priority_health_.end(), 0), 100);
std::accumulate(per_priority_health.begin(), per_priority_health.end(), 0), 100);
if (total_health == 0) {
// Everything is terrible. Send all load to P=0.
// In this one case sumEntries(per_priority_load_) != 100 since we sinkhole all traffic in P=0.
per_priority_load_[0] = 100;
per_priority_load[0] = 100;
return;
}

size_t total_load = 100;
int32_t first_healthy_priority = -1;
for (size_t i = 0; i < per_priority_health_.size(); ++i) {
if (first_healthy_priority < 0 && per_priority_health_[i] > 0) {
for (size_t i = 0; i < per_priority_health.size(); ++i) {
if (first_healthy_priority < 0 && per_priority_health[i] > 0) {
first_healthy_priority = i;
}
// Now assign as much load as possible to the high priority levels and cease assigning load
// when total_load runs out.
per_priority_load_[i] =
std::min<uint32_t>(total_load, per_priority_health_[i] * 100 / total_health);
total_load -= per_priority_load_[i];
per_priority_load[i] =
std::min<uint32_t>(total_load, per_priority_health[i] * 100 / total_health);
total_load -= per_priority_load[i];
}

if (total_load != 0) {
ASSERT(first_healthy_priority != -1);
// Account for rounding errors by assigning it to the first healthy priority.
ASSERT(total_load < per_priority_load_.size());
per_priority_load_[first_healthy_priority] += total_load;
ASSERT(total_load < per_priority_load.size());
per_priority_load[first_healthy_priority] += total_load;
}
}

Expand Down
8 changes: 6 additions & 2 deletions source/common/upstream/load_balancer_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,11 +70,15 @@ class LoadBalancerBase : public LoadBalancer {
// The priority-ordered set of hosts to use for load balancing.
const PrioritySet& priority_set_;

public:
// Called when a host set at the given priority level is updated. This updates
// per_priority_health_ for that priority level, and may update per_priority_load_ for all
// per_priority_health for that priority level, and may update per_priority_load for all
// priority levels.
void recalculatePerPriorityState(uint32_t priority);
void static recalculatePerPriorityState(uint32_t priority, const PrioritySet& priority_set,
PriorityLoad& priority_load,
std::vector<uint32_t>& per_priority_health);

protected:
// The percentage load (0-100) for each priority level
std::vector<uint32_t> per_priority_load_;
// The health (0-100) for each priority level.
Expand Down
3 changes: 3 additions & 0 deletions source/extensions/extensions_build_config.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,9 @@ EXTENSIONS = {

# Retry host predicates
"envoy.retry_host_predicates.other_hosts": "//source/extensions/retry/host/other_hosts:config",

# Retry priorities
"envoy.retry_priorities.previous_priorities": "//source/extensions/retry/priority/other_priority:config",
}

WINDOWS_EXTENSIONS = {
Expand Down
4 changes: 4 additions & 0 deletions source/extensions/retry/host/other_hosts/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@ class OtherHostsRetryPredicateFactory : public Upstream::RetryHostPredicateFacto
}

std::string name() override { return RetryHostPredicateValues::get().PreviousHostsPredicate; }

ProtobufTypes::MessagePtr createEmptyConfigProto() override {
return ProtobufTypes::MessagePtr{new Envoy::ProtobufWkt::Empty()};
}
};

} // namespace Host
Expand Down
17 changes: 17 additions & 0 deletions source/extensions/retry/priority/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
licenses(["notice"]) # Apache 2

load(
"//bazel:envoy_build_system.bzl",
"envoy_cc_library",
"envoy_package",
)

envoy_package()

envoy_cc_library(
name = "well_known_names",
hdrs = ["well_known_names.h"],
deps = [
"//source/common/singleton:const_singleton",
],
)
33 changes: 33 additions & 0 deletions source/extensions/retry/priority/other_priority/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
licenses(["notice"]) # Apache 2

load(
"//bazel:envoy_build_system.bzl",
"envoy_cc_library",
"envoy_package",
)

envoy_package()

envoy_cc_library(
name = "other_priority_lib",
srcs = ["other_priority.cc"],
hdrs = ["other_priority.h"],
deps = [
"//include/envoy/upstream:retry_interface",
"//source/common/upstream:load_balancer_lib",
],
)

envoy_cc_library(
name = "config",
srcs = ["config.cc"],
hdrs = ["config.h"],
deps = [
":other_priority_lib",
"//include/envoy/registry",
"//include/envoy/upstream:retry_interface",
"//source/common/protobuf",
"//source/extensions/retry/priority:well_known_names",
"@envoy_api//envoy/config/retry/other_priority:other_priority_cc",
],
)
28 changes: 28 additions & 0 deletions source/extensions/retry/priority/other_priority/config.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#include "extensions/retry/priority/other_priority/config.h"

#include "envoy/config/retry/other_priority/other_priority_config.pb.validate.h"
#include "envoy/registry/registry.h"
#include "envoy/upstream/retry.h"

namespace Envoy {
namespace Extensions {
namespace Retry {
namespace Priority {

void OtherPriorityRetryPriorityFactory::createRetryPriority(
Upstream::RetryPriorityFactoryCallbacks& callbacks, const Protobuf::Message& config,
uint32_t max_retries) {
callbacks.addRetryPriority(std::make_shared<OtherPriorityRetryPriority>(
MessageUtil::downcastAndValidate<
const envoy::config::retry::other_priority::OtherPriorityConfig&>(config)
.update_frequency(),
max_retries));
}

static Registry::RegisterFactory<OtherPriorityRetryPriorityFactory, Upstream::RetryPriorityFactory>
register_;

} // namespace Priority
} // namespace Retry
} // namespace Extensions
} // namespace Envoy
32 changes: 32 additions & 0 deletions source/extensions/retry/priority/other_priority/config.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#pragma once

#include "envoy/upstream/retry.h"

#include "common/protobuf/protobuf.h"

#include "extensions/retry/priority/other_priority/other_priority.h"
#include "extensions/retry/priority/well_known_names.h"

namespace Envoy {
namespace Extensions {
namespace Retry {
namespace Priority {

class OtherPriorityRetryPriorityFactory : public Upstream::RetryPriorityFactory {
public:
void createRetryPriority(Upstream::RetryPriorityFactoryCallbacks& callbacks,
const Protobuf::Message& config, uint32_t max_retries) override;

std::string name() const override {
return RetryPriorityValues::get().PreviousPrioritiesRetryPriority;
}

ProtobufTypes::MessagePtr createEmptyConfigProto() override {
return ProtobufTypes::MessagePtr(new ::Envoy::ProtobufWkt::Empty());
}
};

} // namespace Priority
} // namespace Retry
} // namespace Extensions
} // namespace Envoy
Loading

0 comments on commit ba5d3f0

Please sign in to comment.