Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement IPVS-based in-cluster service load balancing #46580

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions Godeps/Godeps.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

410 changes: 410 additions & 0 deletions Godeps/LICENSES

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion build/debian-iptables/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,5 @@ CROSS_BUILD_COPY qemu-ARCH-static /usr/bin/
RUN clean-install \
iptables \
ebtables \
conntrack
conntrack \
module-init-tools
3 changes: 3 additions & 0 deletions cmd/kube-proxy/app/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,21 @@ go_library(
"//pkg/apis/componentconfig/v1alpha1:go_default_library",
"//pkg/client/clientset_generated/internalclientset:go_default_library",
"//pkg/client/informers/informers_generated/internalversion:go_default_library",
"//pkg/features:go_default_library",
"//pkg/kubectl/cmd/util:go_default_library",
"//pkg/kubelet/qos:go_default_library",
"//pkg/master/ports:go_default_library",
"//pkg/proxy:go_default_library",
"//pkg/proxy/config:go_default_library",
"//pkg/proxy/healthcheck:go_default_library",
"//pkg/proxy/iptables:go_default_library",
"//pkg/proxy/ipvs:go_default_library",
"//pkg/proxy/userspace:go_default_library",
"//pkg/proxy/winuserspace:go_default_library",
"//pkg/util/configz:go_default_library",
"//pkg/util/dbus:go_default_library",
"//pkg/util/iptables:go_default_library",
"//pkg/util/ipvs:go_default_library",
"//pkg/util/mount:go_default_library",
"//pkg/util/netsh:go_default_library",
"//pkg/util/node:go_default_library",
Expand Down
98 changes: 87 additions & 11 deletions cmd/kube-proxy/app/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,13 @@ import (
proxyconfig "k8s.io/kubernetes/pkg/proxy/config"
"k8s.io/kubernetes/pkg/proxy/healthcheck"
"k8s.io/kubernetes/pkg/proxy/iptables"
"k8s.io/kubernetes/pkg/proxy/ipvs"
"k8s.io/kubernetes/pkg/proxy/userspace"
"k8s.io/kubernetes/pkg/proxy/winuserspace"
"k8s.io/kubernetes/pkg/util/configz"
utildbus "k8s.io/kubernetes/pkg/util/dbus"
utiliptables "k8s.io/kubernetes/pkg/util/iptables"
utilipvs "k8s.io/kubernetes/pkg/util/ipvs"
utilnetsh "k8s.io/kubernetes/pkg/util/netsh"
utilnode "k8s.io/kubernetes/pkg/util/node"
"k8s.io/kubernetes/pkg/util/oom"
Expand All @@ -76,17 +78,19 @@ import (
"github.com/prometheus/client_golang/prometheus"
"github.com/spf13/cobra"
"github.com/spf13/pflag"
"k8s.io/kubernetes/pkg/features"
)

const (
proxyModeUserspace = "userspace"
proxyModeIPTables = "iptables"
proxyModeIPVS = "ipvs"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add this to --proxy-mode 's comment?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, will do that.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed.

)

// checkKnownProxyMode returns true if proxyMode is valid.
func checkKnownProxyMode(proxyMode string) bool {
switch proxyMode {
case "", proxyModeUserspace, proxyModeIPTables:
case "", proxyModeUserspace, proxyModeIPTables, proxyModeIPVS:
return true
}
return false
Expand Down Expand Up @@ -122,7 +126,8 @@ type Options struct {
func AddFlags(options *Options, fs *pflag.FlagSet) {
fs.StringVar(&options.ConfigFile, "config", options.ConfigFile, "The path to the configuration file.")
fs.StringVar(&options.WriteConfigTo, "write-config-to", options.WriteConfigTo, "If set, write the default configuration values to this file and exit.")
fs.BoolVar(&options.CleanupAndExit, "cleanup-iptables", options.CleanupAndExit, "If true, cleanup iptables rules and exit.")
fs.MarkDeprecated("cleanup-iptables", "This flag is replaced by cleanup-proxyrules.")
fs.BoolVar(&options.CleanupAndExit, "cleanup", options.CleanupAndExit, "If true cleanup iptables and ipvs rules and exit.")

// All flags below here are deprecated and will eventually be removed.

Expand All @@ -137,10 +142,12 @@ func AddFlags(options *Options, fs *pflag.FlagSet) {
fs.StringVar(&options.config.ClientConnection.KubeConfigFile, "kubeconfig", options.config.ClientConnection.KubeConfigFile, "Path to kubeconfig file with authorization information (the master location is set by the master flag).")
fs.Var(componentconfig.PortRangeVar{Val: &options.config.PortRange}, "proxy-port-range", "Range of host ports (beginPort-endPort, inclusive) that may be consumed in order to proxy service traffic. If unspecified (0-0) then ports will be randomly chosen.")
fs.StringVar(&options.config.HostnameOverride, "hostname-override", options.config.HostnameOverride, "If non-empty, will use this string as identification instead of the actual hostname.")
fs.Var(&options.config.Mode, "proxy-mode", "Which proxy mode to use: 'userspace' (older) or 'iptables' (faster). If blank, use the best-available proxy (currently iptables). If the iptables proxy is selected, regardless of how, but the system's kernel or iptables versions are insufficient, this always falls back to the userspace proxy.")
fs.Var(&options.config.Mode, "proxy-mode", "Which proxy mode to use: 'userspace' (older) or 'iptables' (faster) or 'ipvs'(experimental). If blank, use the best-available proxy (currently iptables). If the iptables proxy is selected, regardless of how, but the system's kernel or iptables versions are insufficient, this always falls back to the userspace proxy.")
fs.Int32Var(options.config.IPTables.MasqueradeBit, "iptables-masquerade-bit", utilpointer.Int32PtrDerefOr(options.config.IPTables.MasqueradeBit, 14), "If using the pure iptables proxy, the bit of the fwmark space to mark packets requiring SNAT with. Must be within the range [0, 31].")
fs.DurationVar(&options.config.IPTables.SyncPeriod.Duration, "iptables-sync-period", options.config.IPTables.SyncPeriod.Duration, "The maximum interval of how often iptables rules are refreshed (e.g. '5s', '1m', '2h22m'). Must be greater than 0.")
fs.DurationVar(&options.config.IPTables.MinSyncPeriod.Duration, "iptables-min-sync-period", options.config.IPTables.MinSyncPeriod.Duration, "The minimum interval of how often the iptables rules can be refreshed as endpoints and services change (e.g. '5s', '1m', '2h22m').")
fs.DurationVar(&options.config.IPVS.SyncPeriod.Duration, "ipvs-sync-period", options.config.IPVS.SyncPeriod.Duration, "The maximum interval of how often ipvs rules are refreshed (e.g. '5s', '1m', '2h22m'). Must be greater than 0.")
fs.DurationVar(&options.config.IPVS.MinSyncPeriod.Duration, "ipvs-min-sync-period", options.config.IPVS.MinSyncPeriod.Duration, "The minimum interval of how often the ipvs rules can be refreshed as endpoints and services change (e.g. '5s', '1m', '2h22m').")
fs.DurationVar(&options.config.ConfigSyncPeriod.Duration, "config-sync-period", options.config.ConfigSyncPeriod.Duration, "How often configuration from the apiserver is refreshed. Must be greater than 0.")
fs.BoolVar(&options.config.IPTables.MasqueradeAll, "masquerade-all", options.config.IPTables.MasqueradeAll, "If using the pure iptables proxy, SNAT everything (this not commonly needed)")
fs.StringVar(&options.config.ClusterCIDR, "cluster-cidr", options.config.ClusterCIDR, "The CIDR range of pods in the cluster. It is used to bridge traffic coming from outside of the cluster. If not provided, no off-cluster bridging will be performed.")
Expand All @@ -161,7 +168,7 @@ func AddFlags(options *Options, fs *pflag.FlagSet) {
options.config.Conntrack.TCPCloseWaitTimeout.Duration,
"NAT timeout for TCP connections in the CLOSE_WAIT state")
fs.BoolVar(&options.config.EnableProfiling, "profiling", options.config.EnableProfiling, "If true enables profiling via web interface on /debug/pprof handler.")

fs.StringVar(&options.config.IPVS.Scheduler, "ipvs-scheduler", options.config.IPVS.Scheduler, "The ipvs scheduler type when proxy mode is ipvs")
utilfeature.DefaultFeatureGate.AddFlag(fs)
}

Expand All @@ -187,7 +194,7 @@ func NewOptions() (*Options, error) {
// Complete completes all the required options.
func (o *Options) Complete() error {
if len(o.ConfigFile) == 0 && len(o.WriteConfigTo) == 0 {
glog.Warning("WARNING: all flags other than --config, --write-config-to, and --cleanup-iptables are deprecated. Please begin using a config file ASAP.")
glog.Warning("WARNING: all flags other than --config, --write-config-to, and --cleanup are deprecated. Please begin using a config file ASAP.")
o.applyDeprecatedHealthzPortToConfig()
}

Expand Down Expand Up @@ -363,6 +370,8 @@ type ProxyServer struct {
Client clientset.Interface
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This struct probably needs to be decomposed into smaller pieces rather than the union of all proxy modes.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, actually I have considered the same thing before. But, it will introduce more changes on iptables & userspace proxy.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we fix it in this PR or another PR later?

EventClient v1core.EventsGetter
IptInterface utiliptables.Interface
IpvsInterface utilipvs.Interface
execer exec.Interface
Proxier proxy.ProxyProvider
Broadcaster record.EventBroadcaster
Recorder record.EventRecorder
Expand Down Expand Up @@ -435,6 +444,7 @@ func NewProxyServer(config *componentconfig.KubeProxyConfiguration, cleanupAndEx

var netshInterface utilnetsh.Interface
var iptInterface utiliptables.Interface
var ipvsInterface utilipvs.Interface
var dbus utildbus.Interface

// Create a iptables utils.
Expand All @@ -445,11 +455,12 @@ func NewProxyServer(config *componentconfig.KubeProxyConfiguration, cleanupAndEx
} else {
dbus = utildbus.New()
iptInterface = utiliptables.New(execer, dbus, protocol)
ipvsInterface = utilipvs.New(execer)
}

// We omit creation of pretty much everything if we run in cleanup mode
if cleanupAndExit {
return &ProxyServer{IptInterface: iptInterface, CleanupAndExit: cleanupAndExit}, nil
return &ProxyServer{IptInterface: iptInterface, IpvsInterface: ipvsInterface, CleanupAndExit: cleanupAndExit}, nil
}

client, eventClient, err := createClients(config.ClientConnection, master)
Expand Down Expand Up @@ -517,9 +528,40 @@ func NewProxyServer(config *componentconfig.KubeProxyConfiguration, cleanupAndEx
serviceEventHandler = proxierIPTables
endpointsEventHandler = proxierIPTables
// No turning back. Remove artifacts that might still exist from the userspace Proxier.
glog.V(0).Info("Tearing down userspace rules.")
glog.V(0).Info("Tearing down inactive rules.")
// TODO this has side effects that should only happen when Run() is invoked.
userspace.CleanupLeftovers(iptInterface)
// IPVS Proxier will generate some iptables rules,
// need to clean them before switching to other proxy mode.
ipvs.CleanupLeftovers(execer, ipvsInterface, iptInterface)
} else if proxyMode == proxyModeIPVS {
glog.V(0).Info("Using ipvs Proxier.")
proxierIPVS, err := ipvs.NewProxier(
iptInterface,
ipvsInterface,
utilsysctl.New(),
execer,
config.IPVS.SyncPeriod.Duration,
config.IPVS.MinSyncPeriod.Duration,
config.IPTables.MasqueradeAll,
int(*config.IPTables.MasqueradeBit),
config.ClusterCIDR,
hostname,
getNodeIP(client, hostname),
recorder,
healthzServer,
config.IPVS.Scheduler,
)
if err != nil {
return nil, fmt.Errorf("unable to create proxier: %v", err)
}
proxier = proxierIPVS
serviceEventHandler = proxierIPVS
endpointsEventHandler = proxierIPVS
glog.V(0).Info("Tearing down inactive rules.")
// TODO this has side effects that should only happen when Run() is invoked.
userspace.CleanupLeftovers(iptInterface)
iptables.CleanupLeftovers(iptInterface)
} else {
glog.V(0).Info("Using userspace Proxier.")
if goruntime.GOOS == "windows" {
Expand Down Expand Up @@ -566,11 +608,14 @@ func NewProxyServer(config *componentconfig.KubeProxyConfiguration, cleanupAndEx
serviceEventHandler = proxierUserspace
proxier = proxierUserspace
}
// Remove artifacts from the pure-iptables Proxier, if not on Windows.
// Remove artifacts from the iptables and ipvs Proxier, if not on Windows.
if goruntime.GOOS != "windows" {
glog.V(0).Info("Tearing down pure-iptables proxy rules.")
glog.V(0).Info("Tearing down inactive rules.")
// TODO this has side effects that should only happen when Run() is invoked.
iptables.CleanupLeftovers(iptInterface)
// IPVS Proxier will generate some iptables rules,
// need to clean them before switching to other proxy mode.
ipvs.CleanupLeftovers(execer, ipvsInterface, iptInterface)
}
}

Expand All @@ -583,6 +628,8 @@ func NewProxyServer(config *componentconfig.KubeProxyConfiguration, cleanupAndEx
Client: client,
EventClient: eventClient,
IptInterface: iptInterface,
IpvsInterface: ipvsInterface,
execer: execer,
Proxier: proxier,
Broadcaster: eventBroadcaster,
Recorder: recorder,
Expand All @@ -607,6 +654,7 @@ func (s *ProxyServer) Run() error {
if s.CleanupAndExit {
encounteredError := userspace.CleanupLeftovers(s.IptInterface)
encounteredError = iptables.CleanupLeftovers(s.IptInterface) || encounteredError
encounteredError = ipvs.CleanupLeftovers(s.execer, s.IpvsInterface, s.IptInterface) || encounteredError
if encounteredError {
return errors.New("encountered an error while tearing down rules.")
}
Expand Down Expand Up @@ -754,10 +802,38 @@ func getProxyMode(proxyMode string, iptver iptables.IPTablesVersioner, kcompat i
return proxyModeUserspace
}

if len(proxyMode) > 0 && proxyMode != proxyModeIPTables {
glog.Warningf("Flag proxy-mode=%q unknown, assuming iptables proxy", proxyMode)
if len(proxyMode) > 0 && proxyMode == proxyModeIPTables {
return tryIPTablesProxy(iptver, kcompat)
}

if utilfeature.DefaultFeatureGate.Enabled(features.SupportIPVSProxyMode) {
if proxyMode == proxyModeIPVS {
return tryIPVSProxy(iptver, kcompat)
} else {
glog.Warningf("Can't use ipvs proxier, trying iptables proxier")
return tryIPTablesProxy(iptver, kcompat)
}
}
glog.Warningf("Flag proxy-mode=%q unknown, assuming iptables proxy", proxyMode)
return tryIPTablesProxy(iptver, kcompat)
}

func tryIPVSProxy(iptver iptables.IPTablesVersioner, kcompat iptables.KernelCompatTester) string {
// guaranteed false on error, error only necessary for debugging
// IPVS Proxier relies on iptables
useIPVSProxy, err := ipvs.CanUseIPVSProxier()
if err != nil {
utilruntime.HandleError(fmt.Errorf("can't determine whether to use ipvs proxy, using userspace proxier: %v", err))
return proxyModeUserspace
}
if useIPVSProxy {
return proxyModeIPVS
}

// TODO: Check ipvs version

// Try to fallback to iptables before falling back to userspace
glog.V(1).Infof("Can't use ipvs proxier, trying iptables proxier")
return tryIPTablesProxy(iptver, kcompat)
}

Expand Down
16 changes: 12 additions & 4 deletions cmd/kube-proxy/app/server_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,9 @@ iptables:
masqueradeBit: 17
minSyncPeriod: 10s
syncPeriod: 60s
ipvs:
minSyncPeriod: 10s
syncPeriod: 60s
kind: KubeProxyConfiguration
metricsBindAddress: "%s"
mode: "iptables"
Expand Down Expand Up @@ -347,12 +350,17 @@ udpTimeoutMilliseconds: 123ms
MinSyncPeriod: metav1.Duration{Duration: 10 * time.Second},
SyncPeriod: metav1.Duration{Duration: 60 * time.Second},
},
IPVS: componentconfig.KubeProxyIPVSConfiguration{
MinSyncPeriod: metav1.Duration{Duration: 10 * time.Second},
SyncPeriod: metav1.Duration{Duration: 60 * time.Second},
},
MetricsBindAddress: tc.metricsBindAddress,
Mode: "iptables",
OOMScoreAdj: utilpointer.Int32Ptr(17),
PortRange: "2-7",
ResourceContainer: "/foo",
UDPIdleTimeout: metav1.Duration{Duration: 123 * time.Millisecond},
// TODO: IPVS
OOMScoreAdj: utilpointer.Int32Ptr(17),
PortRange: "2-7",
ResourceContainer: "/foo",
UDPIdleTimeout: metav1.Duration{Duration: 123 * time.Millisecond},
}

options, err := NewOptions()
Expand Down
15 changes: 15 additions & 0 deletions pkg/apis/componentconfig/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,19 @@ type KubeProxyIPTablesConfiguration struct {
MinSyncPeriod metav1.Duration
}

// KubeProxyIPVSConfiguration contains ipvs-related configuration
// details for the Kubernetes proxy server.
type KubeProxyIPVSConfiguration struct {
// syncPeriod is the period that ipvs rules are refreshed (e.g. '5s', '1m',
// '2h22m'). Must be greater than 0.
SyncPeriod metav1.Duration
// minSyncPeriod is the minimum period that ipvs rules are refreshed (e.g. '5s', '1m',
// '2h22m').
MinSyncPeriod metav1.Duration
// ipvs scheduler
Scheduler string
}

// KubeProxyConntrackConfiguration contains conntrack settings for
// the Kubernetes proxy server.
type KubeProxyConntrackConfiguration struct {
Expand Down Expand Up @@ -112,6 +125,8 @@ type KubeProxyConfiguration struct {
ClientConnection ClientConnectionConfiguration
// iptables contains iptables-related configuration options.
IPTables KubeProxyIPTablesConfiguration
// ipvs contains ipvs-related configuration options.
IPVS KubeProxyIPVSConfiguration
// oomScoreAdj is the oom-score-adj value for kube-proxy process. Values must be within
// the range [-1000, 1000]
OOMScoreAdj *int32
Expand Down
15 changes: 15 additions & 0 deletions pkg/apis/componentconfig/v1alpha1/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,19 @@ type KubeProxyIPTablesConfiguration struct {
MinSyncPeriod metav1.Duration `json:"minSyncPeriod"`
}

// KubeProxyIPVSConfiguration contains ipvs-related configuration
// details for the Kubernetes proxy server.
type KubeProxyIPVSConfiguration struct {
// syncPeriod is the period that ipvs rules are refreshed (e.g. '5s', '1m',
// '2h22m'). Must be greater than 0.
SyncPeriod metav1.Duration `json:"syncPeriod"`
// minSyncPeriod is the minimum period that ipvs rules are refreshed (e.g. '5s', '1m',
// '2h22m').
MinSyncPeriod metav1.Duration `json:"minSyncPeriod"`
// ipvs scheduler
Scheduler string `json:"scheduler"`
}

// KubeProxyConntrackConfiguration contains conntrack settings for
// the Kubernetes proxy server.
type KubeProxyConntrackConfiguration struct {
Expand Down Expand Up @@ -112,6 +125,8 @@ type KubeProxyConfiguration struct {
ClientConnection ClientConnectionConfiguration `json:"clientConnection"`
// iptables contains iptables-related configuration options.
IPTables KubeProxyIPTablesConfiguration `json:"iptables"`
// ipvs contains ipvs-related configuration options.
IPVS KubeProxyIPVSConfiguration `json:"ipvs"`
// oomScoreAdj is the oom-score-adj value for kube-proxy process. Values must be within
// the range [-1000, 1000]
OOMScoreAdj *int32 `json:"oomScoreAdj"`
Expand Down
Loading