Skip to content

Commit

Permalink
Merge pull request grpc#5733 from ctiller/dns_backoff
Browse files Browse the repository at this point in the history
Exponential backoff for DNS retries
  • Loading branch information
jtattermusch committed Mar 22, 2016
2 parents 3271dbc + 0f30cd8 commit fe279af
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 8 deletions.
27 changes: 19 additions & 8 deletions src/core/client_config/resolvers/dns_resolver.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,14 @@
#include "src/core/client_config/lb_policy_registry.h"
#include "src/core/iomgr/resolve_address.h"
#include "src/core/iomgr/timer.h"
#include "src/core/support/backoff.h"
#include "src/core/support/string.h"

#define BACKOFF_MULTIPLIER 1.6
#define BACKOFF_JITTER 0.2
#define BACKOFF_MIN_SECONDS 1
#define BACKOFF_MAX_SECONDS 120

typedef struct {
/** base class: must be first */
grpc_resolver base;
Expand Down Expand Up @@ -75,6 +81,8 @@ typedef struct {
/** retry timer */
bool have_retry_timer;
grpc_timer retry_timer;
/** retry backoff state */
gpr_backoff backoff_state;
} dns_resolver;

static void dns_destroy(grpc_exec_ctx *exec_ctx, grpc_resolver *r);
Expand Down Expand Up @@ -111,6 +119,7 @@ static void dns_channel_saw_error(grpc_exec_ctx *exec_ctx,
dns_resolver *r = (dns_resolver *)resolver;
gpr_mu_lock(&r->mu);
if (!r->resolving) {
gpr_backoff_reset(&r->backoff_state);
dns_start_resolving_locked(r);
}
gpr_mu_unlock(&r->mu);
Expand All @@ -125,6 +134,7 @@ static void dns_next(grpc_exec_ctx *exec_ctx, grpc_resolver *resolver,
r->next_completion = on_complete;
r->target_config = target_config;
if (r->resolved_version == 0 && !r->resolving) {
gpr_backoff_reset(&r->backoff_state);
dns_start_resolving_locked(r);
} else {
dns_maybe_finish_next_locked(exec_ctx, r);
Expand Down Expand Up @@ -185,17 +195,16 @@ static void dns_on_resolved(grpc_exec_ctx *exec_ctx, void *arg,
grpc_resolved_addresses_destroy(addresses);
gpr_free(subchannels);
} else {
int retry_seconds = 15;
gpr_log(GPR_DEBUG, "dns resolution failed: retrying in %d seconds",
retry_seconds);
gpr_timespec now = gpr_now(GPR_CLOCK_MONOTONIC);
gpr_timespec next_try = gpr_backoff_step(&r->backoff_state, now);
gpr_timespec timeout = gpr_time_sub(next_try, now);
gpr_log(GPR_DEBUG, "dns resolution failed: retrying in %d.%09d seconds",
timeout.tv_sec, timeout.tv_nsec);
GPR_ASSERT(!r->have_retry_timer);
r->have_retry_timer = true;
gpr_timespec now = gpr_now(GPR_CLOCK_MONOTONIC);
GRPC_RESOLVER_REF(&r->base, "retry-timer");
grpc_timer_init(
exec_ctx, &r->retry_timer,
gpr_time_add(now, gpr_time_from_seconds(retry_seconds, GPR_TIMESPAN)),
dns_on_retry_timer, r, now);
grpc_timer_init(exec_ctx, &r->retry_timer, next_try, dns_on_retry_timer, r,
now);
}
if (r->resolved_config) {
grpc_client_config_unref(exec_ctx, r->resolved_config);
Expand Down Expand Up @@ -263,6 +272,8 @@ static grpc_resolver *dns_create(grpc_resolver_args *args,
r->name = gpr_strdup(path);
r->default_port = gpr_strdup(default_port);
r->subchannel_factory = args->subchannel_factory;
gpr_backoff_init(&r->backoff_state, BACKOFF_MULTIPLIER, BACKOFF_JITTER,
BACKOFF_MIN_SECONDS * 1000, BACKOFF_MAX_SECONDS * 1000);
grpc_subchannel_factory_ref(r->subchannel_factory);
r->lb_policy_name = gpr_strdup(lb_policy_name);
return &r->base;
Expand Down
5 changes: 5 additions & 0 deletions src/core/support/backoff.c
Original file line number Diff line number Diff line change
Expand Up @@ -69,3 +69,8 @@ gpr_timespec gpr_backoff_step(gpr_backoff *backoff, gpr_timespec now) {
return gpr_time_add(
now, gpr_time_from_millis(backoff->current_timeout_millis, GPR_TIMESPAN));
}

void gpr_backoff_reset(gpr_backoff *backoff) {
// forces step() to return a timeout of min_timeout_millis
backoff->current_timeout_millis = 0;
}
3 changes: 3 additions & 0 deletions src/core/support/backoff.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,5 +61,8 @@ void gpr_backoff_init(gpr_backoff *backoff, double multiplier, double jitter,
gpr_timespec gpr_backoff_begin(gpr_backoff *backoff, gpr_timespec now);
/// Step a retry loop: returns a timespec for the NEXT retry
gpr_timespec gpr_backoff_step(gpr_backoff *backoff, gpr_timespec now);
/// Reset the backoff, so the next gpr_backoff_step will be a gpr_backoff_begin
/// instead
void gpr_backoff_reset(gpr_backoff *backoff);

#endif /* GRPC_CORE_SUPPORT_BACKOFF_H */

0 comments on commit fe279af

Please sign in to comment.