-
Notifications
You must be signed in to change notification settings - Fork 40k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Implement IPVS-based in-cluster service load balancing #46580
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -58,11 +58,13 @@ import ( | |
proxyconfig "k8s.io/kubernetes/pkg/proxy/config" | ||
"k8s.io/kubernetes/pkg/proxy/healthcheck" | ||
"k8s.io/kubernetes/pkg/proxy/iptables" | ||
"k8s.io/kubernetes/pkg/proxy/ipvs" | ||
"k8s.io/kubernetes/pkg/proxy/userspace" | ||
"k8s.io/kubernetes/pkg/proxy/winuserspace" | ||
"k8s.io/kubernetes/pkg/util/configz" | ||
utildbus "k8s.io/kubernetes/pkg/util/dbus" | ||
utiliptables "k8s.io/kubernetes/pkg/util/iptables" | ||
utilipvs "k8s.io/kubernetes/pkg/util/ipvs" | ||
utilnetsh "k8s.io/kubernetes/pkg/util/netsh" | ||
utilnode "k8s.io/kubernetes/pkg/util/node" | ||
"k8s.io/kubernetes/pkg/util/oom" | ||
|
@@ -76,17 +78,19 @@ import ( | |
"github.com/prometheus/client_golang/prometheus" | ||
"github.com/spf13/cobra" | ||
"github.com/spf13/pflag" | ||
"k8s.io/kubernetes/pkg/features" | ||
) | ||
|
||
const ( | ||
proxyModeUserspace = "userspace" | ||
proxyModeIPTables = "iptables" | ||
proxyModeIPVS = "ipvs" | ||
) | ||
|
||
// checkKnownProxyMode returns true if proxyMode is valid. | ||
func checkKnownProxyMode(proxyMode string) bool { | ||
switch proxyMode { | ||
case "", proxyModeUserspace, proxyModeIPTables: | ||
case "", proxyModeUserspace, proxyModeIPTables, proxyModeIPVS: | ||
return true | ||
} | ||
return false | ||
|
@@ -122,7 +126,8 @@ type Options struct { | |
func AddFlags(options *Options, fs *pflag.FlagSet) { | ||
fs.StringVar(&options.ConfigFile, "config", options.ConfigFile, "The path to the configuration file.") | ||
fs.StringVar(&options.WriteConfigTo, "write-config-to", options.WriteConfigTo, "If set, write the default configuration values to this file and exit.") | ||
fs.BoolVar(&options.CleanupAndExit, "cleanup-iptables", options.CleanupAndExit, "If true, cleanup iptables rules and exit.") | ||
fs.MarkDeprecated("cleanup-iptables", "This flag is replaced by cleanup-proxyrules.") | ||
fs.BoolVar(&options.CleanupAndExit, "cleanup", options.CleanupAndExit, "If true cleanup iptables and ipvs rules and exit.") | ||
|
||
// All flags below here are deprecated and will eventually be removed. | ||
|
||
|
@@ -137,10 +142,12 @@ func AddFlags(options *Options, fs *pflag.FlagSet) { | |
fs.StringVar(&options.config.ClientConnection.KubeConfigFile, "kubeconfig", options.config.ClientConnection.KubeConfigFile, "Path to kubeconfig file with authorization information (the master location is set by the master flag).") | ||
fs.Var(componentconfig.PortRangeVar{Val: &options.config.PortRange}, "proxy-port-range", "Range of host ports (beginPort-endPort, inclusive) that may be consumed in order to proxy service traffic. If unspecified (0-0) then ports will be randomly chosen.") | ||
fs.StringVar(&options.config.HostnameOverride, "hostname-override", options.config.HostnameOverride, "If non-empty, will use this string as identification instead of the actual hostname.") | ||
fs.Var(&options.config.Mode, "proxy-mode", "Which proxy mode to use: 'userspace' (older) or 'iptables' (faster). If blank, use the best-available proxy (currently iptables). If the iptables proxy is selected, regardless of how, but the system's kernel or iptables versions are insufficient, this always falls back to the userspace proxy.") | ||
fs.Var(&options.config.Mode, "proxy-mode", "Which proxy mode to use: 'userspace' (older) or 'iptables' (faster) or 'ipvs'(experimental). If blank, use the best-available proxy (currently iptables). If the iptables proxy is selected, regardless of how, but the system's kernel or iptables versions are insufficient, this always falls back to the userspace proxy.") | ||
fs.Int32Var(options.config.IPTables.MasqueradeBit, "iptables-masquerade-bit", utilpointer.Int32PtrDerefOr(options.config.IPTables.MasqueradeBit, 14), "If using the pure iptables proxy, the bit of the fwmark space to mark packets requiring SNAT with. Must be within the range [0, 31].") | ||
fs.DurationVar(&options.config.IPTables.SyncPeriod.Duration, "iptables-sync-period", options.config.IPTables.SyncPeriod.Duration, "The maximum interval of how often iptables rules are refreshed (e.g. '5s', '1m', '2h22m'). Must be greater than 0.") | ||
fs.DurationVar(&options.config.IPTables.MinSyncPeriod.Duration, "iptables-min-sync-period", options.config.IPTables.MinSyncPeriod.Duration, "The minimum interval of how often the iptables rules can be refreshed as endpoints and services change (e.g. '5s', '1m', '2h22m').") | ||
fs.DurationVar(&options.config.IPVS.SyncPeriod.Duration, "ipvs-sync-period", options.config.IPVS.SyncPeriod.Duration, "The maximum interval of how often ipvs rules are refreshed (e.g. '5s', '1m', '2h22m'). Must be greater than 0.") | ||
fs.DurationVar(&options.config.IPVS.MinSyncPeriod.Duration, "ipvs-min-sync-period", options.config.IPVS.MinSyncPeriod.Duration, "The minimum interval of how often the ipvs rules can be refreshed as endpoints and services change (e.g. '5s', '1m', '2h22m').") | ||
fs.DurationVar(&options.config.ConfigSyncPeriod.Duration, "config-sync-period", options.config.ConfigSyncPeriod.Duration, "How often configuration from the apiserver is refreshed. Must be greater than 0.") | ||
fs.BoolVar(&options.config.IPTables.MasqueradeAll, "masquerade-all", options.config.IPTables.MasqueradeAll, "If using the pure iptables proxy, SNAT everything (this not commonly needed)") | ||
fs.StringVar(&options.config.ClusterCIDR, "cluster-cidr", options.config.ClusterCIDR, "The CIDR range of pods in the cluster. It is used to bridge traffic coming from outside of the cluster. If not provided, no off-cluster bridging will be performed.") | ||
|
@@ -161,7 +168,7 @@ func AddFlags(options *Options, fs *pflag.FlagSet) { | |
options.config.Conntrack.TCPCloseWaitTimeout.Duration, | ||
"NAT timeout for TCP connections in the CLOSE_WAIT state") | ||
fs.BoolVar(&options.config.EnableProfiling, "profiling", options.config.EnableProfiling, "If true enables profiling via web interface on /debug/pprof handler.") | ||
|
||
fs.StringVar(&options.config.IPVS.Scheduler, "ipvs-scheduler", options.config.IPVS.Scheduler, "The ipvs scheduler type when proxy mode is ipvs") | ||
utilfeature.DefaultFeatureGate.AddFlag(fs) | ||
} | ||
|
||
|
@@ -187,7 +194,7 @@ func NewOptions() (*Options, error) { | |
// Complete completes all the required options. | ||
func (o *Options) Complete() error { | ||
if len(o.ConfigFile) == 0 && len(o.WriteConfigTo) == 0 { | ||
glog.Warning("WARNING: all flags other than --config, --write-config-to, and --cleanup-iptables are deprecated. Please begin using a config file ASAP.") | ||
glog.Warning("WARNING: all flags other than --config, --write-config-to, and --cleanup are deprecated. Please begin using a config file ASAP.") | ||
o.applyDeprecatedHealthzPortToConfig() | ||
} | ||
|
||
|
@@ -363,6 +370,8 @@ type ProxyServer struct { | |
Client clientset.Interface | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This struct probably needs to be decomposed into smaller pieces rather than the union of all proxy modes. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, actually I have considered the same thing before. But, it will introduce more changes on iptables & userspace proxy. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we fix it in this PR or another PR later? |
||
EventClient v1core.EventsGetter | ||
IptInterface utiliptables.Interface | ||
IpvsInterface utilipvs.Interface | ||
execer exec.Interface | ||
Proxier proxy.ProxyProvider | ||
Broadcaster record.EventBroadcaster | ||
Recorder record.EventRecorder | ||
|
@@ -435,6 +444,7 @@ func NewProxyServer(config *componentconfig.KubeProxyConfiguration, cleanupAndEx | |
|
||
var netshInterface utilnetsh.Interface | ||
var iptInterface utiliptables.Interface | ||
var ipvsInterface utilipvs.Interface | ||
var dbus utildbus.Interface | ||
|
||
// Create a iptables utils. | ||
|
@@ -445,11 +455,12 @@ func NewProxyServer(config *componentconfig.KubeProxyConfiguration, cleanupAndEx | |
} else { | ||
dbus = utildbus.New() | ||
iptInterface = utiliptables.New(execer, dbus, protocol) | ||
ipvsInterface = utilipvs.New(execer) | ||
} | ||
|
||
// We omit creation of pretty much everything if we run in cleanup mode | ||
if cleanupAndExit { | ||
return &ProxyServer{IptInterface: iptInterface, CleanupAndExit: cleanupAndExit}, nil | ||
return &ProxyServer{IptInterface: iptInterface, IpvsInterface: ipvsInterface, CleanupAndExit: cleanupAndExit}, nil | ||
} | ||
|
||
client, eventClient, err := createClients(config.ClientConnection, master) | ||
|
@@ -517,9 +528,40 @@ func NewProxyServer(config *componentconfig.KubeProxyConfiguration, cleanupAndEx | |
serviceEventHandler = proxierIPTables | ||
endpointsEventHandler = proxierIPTables | ||
// No turning back. Remove artifacts that might still exist from the userspace Proxier. | ||
glog.V(0).Info("Tearing down userspace rules.") | ||
glog.V(0).Info("Tearing down inactive rules.") | ||
// TODO this has side effects that should only happen when Run() is invoked. | ||
userspace.CleanupLeftovers(iptInterface) | ||
// IPVS Proxier will generate some iptables rules, | ||
// need to clean them before switching to other proxy mode. | ||
ipvs.CleanupLeftovers(execer, ipvsInterface, iptInterface) | ||
} else if proxyMode == proxyModeIPVS { | ||
glog.V(0).Info("Using ipvs Proxier.") | ||
proxierIPVS, err := ipvs.NewProxier( | ||
iptInterface, | ||
ipvsInterface, | ||
utilsysctl.New(), | ||
execer, | ||
config.IPVS.SyncPeriod.Duration, | ||
config.IPVS.MinSyncPeriod.Duration, | ||
config.IPTables.MasqueradeAll, | ||
int(*config.IPTables.MasqueradeBit), | ||
config.ClusterCIDR, | ||
hostname, | ||
getNodeIP(client, hostname), | ||
recorder, | ||
healthzServer, | ||
config.IPVS.Scheduler, | ||
) | ||
if err != nil { | ||
return nil, fmt.Errorf("unable to create proxier: %v", err) | ||
} | ||
proxier = proxierIPVS | ||
serviceEventHandler = proxierIPVS | ||
endpointsEventHandler = proxierIPVS | ||
glog.V(0).Info("Tearing down inactive rules.") | ||
// TODO this has side effects that should only happen when Run() is invoked. | ||
userspace.CleanupLeftovers(iptInterface) | ||
iptables.CleanupLeftovers(iptInterface) | ||
} else { | ||
glog.V(0).Info("Using userspace Proxier.") | ||
if goruntime.GOOS == "windows" { | ||
|
@@ -566,11 +608,14 @@ func NewProxyServer(config *componentconfig.KubeProxyConfiguration, cleanupAndEx | |
serviceEventHandler = proxierUserspace | ||
proxier = proxierUserspace | ||
} | ||
// Remove artifacts from the pure-iptables Proxier, if not on Windows. | ||
// Remove artifacts from the iptables and ipvs Proxier, if not on Windows. | ||
if goruntime.GOOS != "windows" { | ||
glog.V(0).Info("Tearing down pure-iptables proxy rules.") | ||
glog.V(0).Info("Tearing down inactive rules.") | ||
// TODO this has side effects that should only happen when Run() is invoked. | ||
iptables.CleanupLeftovers(iptInterface) | ||
// IPVS Proxier will generate some iptables rules, | ||
// need to clean them before switching to other proxy mode. | ||
ipvs.CleanupLeftovers(execer, ipvsInterface, iptInterface) | ||
} | ||
} | ||
|
||
|
@@ -583,6 +628,8 @@ func NewProxyServer(config *componentconfig.KubeProxyConfiguration, cleanupAndEx | |
Client: client, | ||
EventClient: eventClient, | ||
IptInterface: iptInterface, | ||
IpvsInterface: ipvsInterface, | ||
execer: execer, | ||
Proxier: proxier, | ||
Broadcaster: eventBroadcaster, | ||
Recorder: recorder, | ||
|
@@ -607,6 +654,7 @@ func (s *ProxyServer) Run() error { | |
if s.CleanupAndExit { | ||
encounteredError := userspace.CleanupLeftovers(s.IptInterface) | ||
encounteredError = iptables.CleanupLeftovers(s.IptInterface) || encounteredError | ||
encounteredError = ipvs.CleanupLeftovers(s.execer, s.IpvsInterface, s.IptInterface) || encounteredError | ||
if encounteredError { | ||
return errors.New("encountered an error while tearing down rules.") | ||
} | ||
|
@@ -754,10 +802,38 @@ func getProxyMode(proxyMode string, iptver iptables.IPTablesVersioner, kcompat i | |
return proxyModeUserspace | ||
} | ||
|
||
if len(proxyMode) > 0 && proxyMode != proxyModeIPTables { | ||
glog.Warningf("Flag proxy-mode=%q unknown, assuming iptables proxy", proxyMode) | ||
if len(proxyMode) > 0 && proxyMode == proxyModeIPTables { | ||
return tryIPTablesProxy(iptver, kcompat) | ||
} | ||
|
||
if utilfeature.DefaultFeatureGate.Enabled(features.SupportIPVSProxyMode) { | ||
if proxyMode == proxyModeIPVS { | ||
return tryIPVSProxy(iptver, kcompat) | ||
} else { | ||
glog.Warningf("Can't use ipvs proxier, trying iptables proxier") | ||
return tryIPTablesProxy(iptver, kcompat) | ||
} | ||
} | ||
glog.Warningf("Flag proxy-mode=%q unknown, assuming iptables proxy", proxyMode) | ||
return tryIPTablesProxy(iptver, kcompat) | ||
} | ||
|
||
func tryIPVSProxy(iptver iptables.IPTablesVersioner, kcompat iptables.KernelCompatTester) string { | ||
// guaranteed false on error, error only necessary for debugging | ||
// IPVS Proxier relies on iptables | ||
useIPVSProxy, err := ipvs.CanUseIPVSProxier() | ||
if err != nil { | ||
utilruntime.HandleError(fmt.Errorf("can't determine whether to use ipvs proxy, using userspace proxier: %v", err)) | ||
return proxyModeUserspace | ||
} | ||
if useIPVSProxy { | ||
return proxyModeIPVS | ||
} | ||
|
||
// TODO: Check ipvs version | ||
|
||
// Try to fallback to iptables before falling back to userspace | ||
glog.V(1).Infof("Can't use ipvs proxier, trying iptables proxier") | ||
return tryIPTablesProxy(iptver, kcompat) | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add this to
--proxy-mode
's comment?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Okay, will do that.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed.