Skip to content

Commit

Permalink
xds: WRR rr_fallback should trigger with one endpoint weight
Browse files Browse the repository at this point in the history
From gRFC A58:
> When less than two subchannels have load info, all subchannels will
> get the same weight and the policy will behave the same as round_robin
  • Loading branch information
ejona86 committed Jul 25, 2024
1 parent b108ed3 commit 786523d
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -598,13 +598,15 @@ static final class StaticStrideScheduler {
if (numWeightedChannels > 0) {
unscaledMeanWeight = sumWeight / numWeightedChannels;
unscaledMaxWeight = Math.min(unscaledMaxWeight, (float) (K_MAX_RATIO * unscaledMeanWeight));
usesRoundRobin = false;
} else {
// Fall back to round robin if all values are non-positives
usesRoundRobin = true;
// Fall back to round robin if all values are non-positives. Note that
// numWeightedChannels == 1 also behaves like RR because the weights are all the same, but
// the weights aren't 1, so it doesn't go through this path.
unscaledMeanWeight = 1;
unscaledMaxWeight = 1;
}
// We need at least two weights for WRR to be distinguishable from round_robin.
usesRoundRobin = numWeightedChannels < 2;

// Scales weights s.t. max(weights) == K_MAX_WEIGHT, meanWeight is scaled accordingly.
// Note that, since we cap the weights to stay within K_MAX_RATIO, meanWeight might not
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1190,13 +1190,17 @@ public void metrics() {
verifyLongCounterRecord("grpc.lb.wrr.endpoint_weight_not_yet_usable", 1, 2);
verifyLongCounterRecord("grpc.lb.wrr.endpoint_weight_not_yet_usable", 1, 3);

// Send each child LB state an ORCA update with some valid utilization/qps data so that weights
// can be calculated.
// Send one child LB state an ORCA update with some valid utilization/qps data so that weights
// can be calculated, but it's still essentially round_robin
Iterator<ChildLbState> childLbStates = wrr.getChildLbStates().iterator();
((WeightedChildLbState)childLbStates.next()).new OrcaReportListener(
weightedConfig.errorUtilizationPenalty).onLoadReport(
InternalCallMetricRecorder.createMetricReport(0.1, 0, 0.1, 1, 0, new HashMap<>(),
new HashMap<>(), new HashMap<>()));

fakeClock.forwardTime(1, TimeUnit.SECONDS);

// Now send a second child LB state an ORCA update, so there's real weights
((WeightedChildLbState)childLbStates.next()).new OrcaReportListener(
weightedConfig.errorUtilizationPenalty).onLoadReport(
InternalCallMetricRecorder.createMetricReport(0.1, 0, 0.1, 1, 0, new HashMap<>(),
Expand All @@ -1210,9 +1214,15 @@ public void metrics() {
// weights were updated
reset(mockMetricRecorder);

// We go forward in time past the default 10s blackout period before weights can be considered
// for wrr. The eights would get updated as the default update interval is 1s.
fakeClock.forwardTime(11, TimeUnit.SECONDS);
// We go forward in time past the default 10s blackout period for the first child. The weights
// would get updated as the default update interval is 1s.
fakeClock.forwardTime(9, TimeUnit.SECONDS);

verifyLongCounterRecord("grpc.lb.wrr.rr_fallback", 1, 1);

// And after another second the other children have weights
reset(mockMetricRecorder);
fakeClock.forwardTime(1, TimeUnit.SECONDS);

// Since we have weights on all the child LB states, the weight update should not result in
// further rr_fallback metric entries.
Expand Down

0 comments on commit 786523d

Please sign in to comment.