Skip to content

Commit

Permalink
Improve agent shutdown, with native sidecar support (istio#47226)
Browse files Browse the repository at this point in the history
* Improve agent shutdown, with native sidecar support

* Fix tests

* Add drain back
  • Loading branch information
howardjohn authored Oct 19, 2023
1 parent c87f515 commit 3639a4f
Show file tree
Hide file tree
Showing 34 changed files with 500 additions and 180 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,9 @@ metadata:
{{- end }}
}
spec:
{{- $holdProxy := or .ProxyConfig.HoldApplicationUntilProxyStarts.GetValue .Values.global.proxy.holdApplicationUntilProxyStarts }}
{{- $holdProxy := and
(or .ProxyConfig.HoldApplicationUntilProxyStarts.GetValue .Values.global.proxy.holdApplicationUntilProxyStarts)
(not $nativeSidecar) }}
initContainers:
{{ if ne (annotation .ObjectMeta `sidecar.istio.io/interceptionMode` .ProxyConfig.InterceptionMode) `NONE` }}
{{ if .Values.istio_cni.enabled -}}
Expand Down Expand Up @@ -223,6 +225,17 @@ spec:
command:
- pilot-agent
- wait
{{- else if $nativeSidecar }}
{{- /* preStop is called when the pod starts shutdown. Initialize drain. We will get SIGTERM once applications are torn down. */}}
lifecycle:
preStop:
exec:
command:
- pilot-agent
- request
- --debug-port={{(annotation .ObjectMeta `status.sidecar.istio.io/port` .Values.global.proxy.statusPort)}}
- POST
- drain
{{- end }}
env:
{{- if eq (env "PILOT_ENABLE_INBOUND_PASSTHROUGH" "true") "false" }}
Expand Down
15 changes: 14 additions & 1 deletion manifests/charts/istiod-remote/files/injection-template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,9 @@ metadata:
{{- end }}
}
spec:
{{- $holdProxy := or .ProxyConfig.HoldApplicationUntilProxyStarts.GetValue .Values.global.proxy.holdApplicationUntilProxyStarts }}
{{- $holdProxy := and
(or .ProxyConfig.HoldApplicationUntilProxyStarts.GetValue .Values.global.proxy.holdApplicationUntilProxyStarts)
(not $nativeSidecar) }}
initContainers:
{{ if ne (annotation .ObjectMeta `sidecar.istio.io/interceptionMode` .ProxyConfig.InterceptionMode) `NONE` }}
{{ if .Values.istio_cni.enabled -}}
Expand Down Expand Up @@ -223,6 +225,17 @@ spec:
command:
- pilot-agent
- wait
{{- else if $nativeSidecar }}
{{- /* preStop is called when the pod starts shutdown. Initialize drain. We will get SIGTERM once applications are torn down. */}}
lifecycle:
preStop:
exec:
command:
- pilot-agent
- request
- --debug-port={{(annotation .ObjectMeta `status.sidecar.istio.io/port` .Values.global.proxy.statusPort)}}
- POST
- drain
{{- end }}
env:
{{- if eq (env "PILOT_ENABLE_INBOUND_PASSTHROUGH" "true") "false" }}
Expand Down
13 changes: 10 additions & 3 deletions pilot/cmd/pilot-agent/app/cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ func newProxyCommand() *cobra.Command {
// If a status port was provided, start handling status probes.
if proxyConfig.StatusPort > 0 {
if err := initStatusServer(ctx, proxy, proxyConfig,
agentOptions.EnvoyPrometheusPort, proxyArgs.EnableProfiling, agent); err != nil {
agentOptions.EnvoyPrometheusPort, proxyArgs.EnableProfiling, agent, cancel); err != nil {
return err
}
}
Expand Down Expand Up @@ -214,13 +214,20 @@ func addFlags(proxyCmd *cobra.Command) {
"Enable profiling via web interface host:port/debug/pprof/.")
}

func initStatusServer(ctx context.Context, proxy *model.Proxy, proxyConfig *meshconfig.ProxyConfig,
envoyPrometheusPort int, enableProfiling bool, agent *istio_agent.Agent,
func initStatusServer(
ctx context.Context,
proxy *model.Proxy,
proxyConfig *meshconfig.ProxyConfig,
envoyPrometheusPort int,
enableProfiling bool,
agent *istio_agent.Agent,
shutdown context.CancelFunc,
) error {
o := options.NewStatusServerOptions(proxy, proxyConfig, agent)
o.EnvoyPrometheusPort = envoyPrometheusPort
o.EnableProfiling = enableProfiling
o.Context = ctx
o.Shutdown = shutdown
statusServer, err := status.NewServer(*o)
if err != nil {
return err
Expand Down
3 changes: 3 additions & 0 deletions pilot/cmd/pilot-agent/options/statusserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,5 +34,8 @@ func NewStatusServerOptions(proxy *model.Proxy, proxyConfig *meshconfig.ProxyCon
NoEnvoy: agent.EnvoyDisabled(),
FetchDNS: agent.GetDNSTable,
GRPCBootstrap: agent.GRPCBootstrapPath(),
TriggerDrain: func() {
agent.DrainNow()
},
}
}
34 changes: 31 additions & 3 deletions pilot/cmd/pilot-agent/status/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,14 +54,16 @@ import (
"istio.io/istio/pkg/kube/apimirror"
"istio.io/istio/pkg/log"
"istio.io/istio/pkg/monitoring"
"istio.io/istio/pkg/network"
"istio.io/istio/pkg/slices"
)

const (
// readyPath is for the pilot agent readiness itself.
readyPath = "/healthz/ready"
// quitPath is to notify the pilot agent to quit.
quitPath = "/quitquitquit"
quitPath = "/quitquitquit"
drainPath = "/drain"
// KubeAppProberEnvName is the name of the command line flag for pilot agent to pass app prober config.
// The json encoded string to pass app HTTP probe information from injector(istioctl or webhook).
// For example, ISTIO_KUBE_APP_PROBERS='{"/app-health/httpbin/livez":{"httpGet":{"path": "/hello", "port": 8080}}.
Expand Down Expand Up @@ -128,6 +130,8 @@ type Options struct {
EnableProfiling bool
// PrometheusRegistry to use. Just for testing.
PrometheusRegistry prometheus.Gatherer
Shutdown context.CancelFunc
TriggerDrain func()
}

// Server provides an endpoint for handling status probes.
Expand All @@ -147,6 +151,8 @@ type Server struct {
http *http.Client
enableProfiling bool
registry prometheus.Gatherer
shutdown context.CancelFunc
drain func()
}

func initializeMonitoring() (prometheus.Gatherer, error) {
Expand Down Expand Up @@ -212,6 +218,10 @@ func NewServer(config Options) (*Server, error) {
config: config,
enableProfiling: config.EnableProfiling,
registry: registry,
shutdown: func() {
config.Shutdown()
},
drain: config.TriggerDrain,
}
if LegacyLocalhostProbeDestination.Get() {
s.appProbersDestination = "localhost"
Expand Down Expand Up @@ -358,6 +368,7 @@ func (s *Server) Run(ctx context.Context) {
// Keep for backward compat with configs.
mux.HandleFunc(`/stats/prometheus`, s.handleStats)
mux.HandleFunc(quitPath, s.handleQuit)
mux.HandleFunc(drainPath, s.handleDrain)
mux.HandleFunc("/app-health/", s.handleAppProbe)

if s.enableProfiling {
Expand Down Expand Up @@ -387,7 +398,9 @@ func (s *Server) Run(ctx context.Context) {

go func() {
if err := http.Serve(l, mux); err != nil {
log.Error(err)
if network.IsUnexpectedListenerError(err) {
log.Error(err)
}
select {
case <-ctx.Done():
// We are shutting down already, don't trigger SIGTERM
Expand Down Expand Up @@ -671,7 +684,22 @@ func (s *Server) handleQuit(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
_, _ = w.Write([]byte("OK"))
log.Infof("handling %s, notifying pilot-agent to exit", quitPath)
notifyExit()
s.shutdown()
}

func (s *Server) handleDrain(w http.ResponseWriter, r *http.Request) {
if !isRequestFromLocalhost(r) {
http.Error(w, "Only requests from localhost are allowed", http.StatusForbidden)
return
}
if r.Method != http.MethodPost {
http.Error(w, "Method Not Allowed", http.StatusMethodNotAllowed)
return
}
w.WriteHeader(http.StatusOK)
_, _ = w.Write([]byte("OK"))
log.Infof("handling %s, starting drain", drainPath)
s.drain()
}

func (s *Server) handleAppProbe(w http.ResponseWriter, req *http.Request) {
Expand Down
Loading

0 comments on commit 3639a4f

Please sign in to comment.