Skip to content

Commit

Permalink
implement ipvs mode of kube-proxy
Browse files Browse the repository at this point in the history
Conflicts:
	pkg/util/ipvs/ipvs_unsupported.go
  • Loading branch information
m1093782566 committed Aug 30, 2017
1 parent 09a8532 commit 5ed2b44
Show file tree
Hide file tree
Showing 15 changed files with 3,988 additions and 24 deletions.
3 changes: 2 additions & 1 deletion build/debian-iptables/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,5 @@ CROSS_BUILD_COPY qemu-ARCH-static /usr/bin/
RUN clean-install \
iptables \
ebtables \
conntrack
conntrack \
module-init-tools
3 changes: 3 additions & 0 deletions cmd/kube-proxy/app/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,21 @@ go_library(
"//pkg/apis/componentconfig/v1alpha1:go_default_library",
"//pkg/client/clientset_generated/internalclientset:go_default_library",
"//pkg/client/informers/informers_generated/internalversion:go_default_library",
"//pkg/features:go_default_library",
"//pkg/kubectl/cmd/util:go_default_library",
"//pkg/kubelet/qos:go_default_library",
"//pkg/master/ports:go_default_library",
"//pkg/proxy:go_default_library",
"//pkg/proxy/config:go_default_library",
"//pkg/proxy/healthcheck:go_default_library",
"//pkg/proxy/iptables:go_default_library",
"//pkg/proxy/ipvs:go_default_library",
"//pkg/proxy/userspace:go_default_library",
"//pkg/proxy/winuserspace:go_default_library",
"//pkg/util/configz:go_default_library",
"//pkg/util/dbus:go_default_library",
"//pkg/util/iptables:go_default_library",
"//pkg/util/ipvs:go_default_library",
"//pkg/util/mount:go_default_library",
"//pkg/util/netsh:go_default_library",
"//pkg/util/node:go_default_library",
Expand Down
98 changes: 87 additions & 11 deletions cmd/kube-proxy/app/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,13 @@ import (
proxyconfig "k8s.io/kubernetes/pkg/proxy/config"
"k8s.io/kubernetes/pkg/proxy/healthcheck"
"k8s.io/kubernetes/pkg/proxy/iptables"
"k8s.io/kubernetes/pkg/proxy/ipvs"
"k8s.io/kubernetes/pkg/proxy/userspace"
"k8s.io/kubernetes/pkg/proxy/winuserspace"
"k8s.io/kubernetes/pkg/util/configz"
utildbus "k8s.io/kubernetes/pkg/util/dbus"
utiliptables "k8s.io/kubernetes/pkg/util/iptables"
utilipvs "k8s.io/kubernetes/pkg/util/ipvs"
utilnetsh "k8s.io/kubernetes/pkg/util/netsh"
utilnode "k8s.io/kubernetes/pkg/util/node"
"k8s.io/kubernetes/pkg/util/oom"
Expand All @@ -76,17 +78,19 @@ import (
"github.com/prometheus/client_golang/prometheus"
"github.com/spf13/cobra"
"github.com/spf13/pflag"
"k8s.io/kubernetes/pkg/features"
)

const (
proxyModeUserspace = "userspace"
proxyModeIPTables = "iptables"
proxyModeIPVS = "ipvs"
)

// checkKnownProxyMode returns true if proxyMode is valid.
func checkKnownProxyMode(proxyMode string) bool {
switch proxyMode {
case "", proxyModeUserspace, proxyModeIPTables:
case "", proxyModeUserspace, proxyModeIPTables, proxyModeIPVS:
return true
}
return false
Expand Down Expand Up @@ -122,7 +126,8 @@ type Options struct {
func AddFlags(options *Options, fs *pflag.FlagSet) {
fs.StringVar(&options.ConfigFile, "config", options.ConfigFile, "The path to the configuration file.")
fs.StringVar(&options.WriteConfigTo, "write-config-to", options.WriteConfigTo, "If set, write the default configuration values to this file and exit.")
fs.BoolVar(&options.CleanupAndExit, "cleanup-iptables", options.CleanupAndExit, "If true, cleanup iptables rules and exit.")
fs.MarkDeprecated("cleanup-iptables", "This flag is replaced by cleanup-proxyrules.")
fs.BoolVar(&options.CleanupAndExit, "cleanup", options.CleanupAndExit, "If true cleanup iptables and ipvs rules and exit.")

// All flags below here are deprecated and will eventually be removed.

Expand All @@ -137,10 +142,12 @@ func AddFlags(options *Options, fs *pflag.FlagSet) {
fs.StringVar(&options.config.ClientConnection.KubeConfigFile, "kubeconfig", options.config.ClientConnection.KubeConfigFile, "Path to kubeconfig file with authorization information (the master location is set by the master flag).")
fs.Var(componentconfig.PortRangeVar{Val: &options.config.PortRange}, "proxy-port-range", "Range of host ports (beginPort-endPort, inclusive) that may be consumed in order to proxy service traffic. If unspecified (0-0) then ports will be randomly chosen.")
fs.StringVar(&options.config.HostnameOverride, "hostname-override", options.config.HostnameOverride, "If non-empty, will use this string as identification instead of the actual hostname.")
fs.Var(&options.config.Mode, "proxy-mode", "Which proxy mode to use: 'userspace' (older) or 'iptables' (faster). If blank, use the best-available proxy (currently iptables). If the iptables proxy is selected, regardless of how, but the system's kernel or iptables versions are insufficient, this always falls back to the userspace proxy.")
fs.Var(&options.config.Mode, "proxy-mode", "Which proxy mode to use: 'userspace' (older) or 'iptables' (faster) or 'ipvs'(experimental). If blank, use the best-available proxy (currently iptables). If the iptables proxy is selected, regardless of how, but the system's kernel or iptables versions are insufficient, this always falls back to the userspace proxy.")
fs.Int32Var(options.config.IPTables.MasqueradeBit, "iptables-masquerade-bit", utilpointer.Int32PtrDerefOr(options.config.IPTables.MasqueradeBit, 14), "If using the pure iptables proxy, the bit of the fwmark space to mark packets requiring SNAT with. Must be within the range [0, 31].")
fs.DurationVar(&options.config.IPTables.SyncPeriod.Duration, "iptables-sync-period", options.config.IPTables.SyncPeriod.Duration, "The maximum interval of how often iptables rules are refreshed (e.g. '5s', '1m', '2h22m'). Must be greater than 0.")
fs.DurationVar(&options.config.IPTables.MinSyncPeriod.Duration, "iptables-min-sync-period", options.config.IPTables.MinSyncPeriod.Duration, "The minimum interval of how often the iptables rules can be refreshed as endpoints and services change (e.g. '5s', '1m', '2h22m').")
fs.DurationVar(&options.config.IPVS.SyncPeriod.Duration, "ipvs-sync-period", options.config.IPVS.SyncPeriod.Duration, "The maximum interval of how often ipvs rules are refreshed (e.g. '5s', '1m', '2h22m'). Must be greater than 0.")
fs.DurationVar(&options.config.IPVS.MinSyncPeriod.Duration, "ipvs-min-sync-period", options.config.IPVS.MinSyncPeriod.Duration, "The minimum interval of how often the ipvs rules can be refreshed as endpoints and services change (e.g. '5s', '1m', '2h22m').")
fs.DurationVar(&options.config.ConfigSyncPeriod.Duration, "config-sync-period", options.config.ConfigSyncPeriod.Duration, "How often configuration from the apiserver is refreshed. Must be greater than 0.")
fs.BoolVar(&options.config.IPTables.MasqueradeAll, "masquerade-all", options.config.IPTables.MasqueradeAll, "If using the pure iptables proxy, SNAT everything (this not commonly needed)")
fs.StringVar(&options.config.ClusterCIDR, "cluster-cidr", options.config.ClusterCIDR, "The CIDR range of pods in the cluster. It is used to bridge traffic coming from outside of the cluster. If not provided, no off-cluster bridging will be performed.")
Expand All @@ -161,7 +168,7 @@ func AddFlags(options *Options, fs *pflag.FlagSet) {
options.config.Conntrack.TCPCloseWaitTimeout.Duration,
"NAT timeout for TCP connections in the CLOSE_WAIT state")
fs.BoolVar(&options.config.EnableProfiling, "profiling", options.config.EnableProfiling, "If true enables profiling via web interface on /debug/pprof handler.")

fs.StringVar(&options.config.IPVS.Scheduler, "ipvs-scheduler", options.config.IPVS.Scheduler, "The ipvs scheduler type when proxy mode is ipvs")
utilfeature.DefaultFeatureGate.AddFlag(fs)
}

Expand All @@ -187,7 +194,7 @@ func NewOptions() (*Options, error) {
// Complete completes all the required options.
func (o *Options) Complete() error {
if len(o.ConfigFile) == 0 && len(o.WriteConfigTo) == 0 {
glog.Warning("WARNING: all flags other than --config, --write-config-to, and --cleanup-iptables are deprecated. Please begin using a config file ASAP.")
glog.Warning("WARNING: all flags other than --config, --write-config-to, and --cleanup are deprecated. Please begin using a config file ASAP.")
o.applyDeprecatedHealthzPortToConfig()
}

Expand Down Expand Up @@ -363,6 +370,8 @@ type ProxyServer struct {
Client clientset.Interface
EventClient v1core.EventsGetter
IptInterface utiliptables.Interface
IpvsInterface utilipvs.Interface
execer exec.Interface
Proxier proxy.ProxyProvider
Broadcaster record.EventBroadcaster
Recorder record.EventRecorder
Expand Down Expand Up @@ -435,6 +444,7 @@ func NewProxyServer(config *componentconfig.KubeProxyConfiguration, cleanupAndEx

var netshInterface utilnetsh.Interface
var iptInterface utiliptables.Interface
var ipvsInterface utilipvs.Interface
var dbus utildbus.Interface

// Create a iptables utils.
Expand All @@ -445,11 +455,12 @@ func NewProxyServer(config *componentconfig.KubeProxyConfiguration, cleanupAndEx
} else {
dbus = utildbus.New()
iptInterface = utiliptables.New(execer, dbus, protocol)
ipvsInterface = utilipvs.New(execer)
}

// We omit creation of pretty much everything if we run in cleanup mode
if cleanupAndExit {
return &ProxyServer{IptInterface: iptInterface, CleanupAndExit: cleanupAndExit}, nil
return &ProxyServer{IptInterface: iptInterface, IpvsInterface: ipvsInterface, CleanupAndExit: cleanupAndExit}, nil
}

client, eventClient, err := createClients(config.ClientConnection, master)
Expand Down Expand Up @@ -517,9 +528,40 @@ func NewProxyServer(config *componentconfig.KubeProxyConfiguration, cleanupAndEx
serviceEventHandler = proxierIPTables
endpointsEventHandler = proxierIPTables
// No turning back. Remove artifacts that might still exist from the userspace Proxier.
glog.V(0).Info("Tearing down userspace rules.")
glog.V(0).Info("Tearing down inactive rules.")
// TODO this has side effects that should only happen when Run() is invoked.
userspace.CleanupLeftovers(iptInterface)
// IPVS Proxier will generate some iptables rules,
// need to clean them before switching to other proxy mode.
ipvs.CleanupLeftovers(execer, ipvsInterface, iptInterface)
} else if proxyMode == proxyModeIPVS {
glog.V(0).Info("Using ipvs Proxier.")
proxierIPVS, err := ipvs.NewProxier(
iptInterface,
ipvsInterface,
utilsysctl.New(),
execer,
config.IPVS.SyncPeriod.Duration,
config.IPVS.MinSyncPeriod.Duration,
config.IPTables.MasqueradeAll,
int(*config.IPTables.MasqueradeBit),
config.ClusterCIDR,
hostname,
getNodeIP(client, hostname),
recorder,
healthzServer,
config.IPVS.Scheduler,
)
if err != nil {
return nil, fmt.Errorf("unable to create proxier: %v", err)
}
proxier = proxierIPVS
serviceEventHandler = proxierIPVS
endpointsEventHandler = proxierIPVS
glog.V(0).Info("Tearing down inactive rules.")
// TODO this has side effects that should only happen when Run() is invoked.
userspace.CleanupLeftovers(iptInterface)
iptables.CleanupLeftovers(iptInterface)
} else {
glog.V(0).Info("Using userspace Proxier.")
if goruntime.GOOS == "windows" {
Expand Down Expand Up @@ -566,11 +608,14 @@ func NewProxyServer(config *componentconfig.KubeProxyConfiguration, cleanupAndEx
serviceEventHandler = proxierUserspace
proxier = proxierUserspace
}
// Remove artifacts from the pure-iptables Proxier, if not on Windows.
// Remove artifacts from the iptables and ipvs Proxier, if not on Windows.
if goruntime.GOOS != "windows" {
glog.V(0).Info("Tearing down pure-iptables proxy rules.")
glog.V(0).Info("Tearing down inactive rules.")
// TODO this has side effects that should only happen when Run() is invoked.
iptables.CleanupLeftovers(iptInterface)
// IPVS Proxier will generate some iptables rules,
// need to clean them before switching to other proxy mode.
ipvs.CleanupLeftovers(execer, ipvsInterface, iptInterface)
}
}

Expand All @@ -583,6 +628,8 @@ func NewProxyServer(config *componentconfig.KubeProxyConfiguration, cleanupAndEx
Client: client,
EventClient: eventClient,
IptInterface: iptInterface,
IpvsInterface: ipvsInterface,
execer: execer,
Proxier: proxier,
Broadcaster: eventBroadcaster,
Recorder: recorder,
Expand All @@ -607,6 +654,7 @@ func (s *ProxyServer) Run() error {
if s.CleanupAndExit {
encounteredError := userspace.CleanupLeftovers(s.IptInterface)
encounteredError = iptables.CleanupLeftovers(s.IptInterface) || encounteredError
encounteredError = ipvs.CleanupLeftovers(s.execer, s.IpvsInterface, s.IptInterface) || encounteredError
if encounteredError {
return errors.New("encountered an error while tearing down rules.")
}
Expand Down Expand Up @@ -754,10 +802,38 @@ func getProxyMode(proxyMode string, iptver iptables.IPTablesVersioner, kcompat i
return proxyModeUserspace
}

if len(proxyMode) > 0 && proxyMode != proxyModeIPTables {
glog.Warningf("Flag proxy-mode=%q unknown, assuming iptables proxy", proxyMode)
if len(proxyMode) > 0 && proxyMode == proxyModeIPTables {
return tryIPTablesProxy(iptver, kcompat)
}

if utilfeature.DefaultFeatureGate.Enabled(features.SupportIPVSProxyMode) {
if proxyMode == proxyModeIPVS {
return tryIPVSProxy(iptver, kcompat)
} else {
glog.Warningf("Can't use ipvs proxier, trying iptables proxier")
return tryIPTablesProxy(iptver, kcompat)
}
}
glog.Warningf("Flag proxy-mode=%q unknown, assuming iptables proxy", proxyMode)
return tryIPTablesProxy(iptver, kcompat)
}

func tryIPVSProxy(iptver iptables.IPTablesVersioner, kcompat iptables.KernelCompatTester) string {
// guaranteed false on error, error only necessary for debugging
// IPVS Proxier relies on iptables
useIPVSProxy, err := ipvs.CanUseIPVSProxier()
if err != nil {
utilruntime.HandleError(fmt.Errorf("can't determine whether to use ipvs proxy, using userspace proxier: %v", err))
return proxyModeUserspace
}
if useIPVSProxy {
return proxyModeIPVS
}

// TODO: Check ipvs version

// Try to fallback to iptables before falling back to userspace
glog.V(1).Infof("Can't use ipvs proxier, trying iptables proxier")
return tryIPTablesProxy(iptver, kcompat)
}

Expand Down
16 changes: 12 additions & 4 deletions cmd/kube-proxy/app/server_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,9 @@ iptables:
masqueradeBit: 17
minSyncPeriod: 10s
syncPeriod: 60s
ipvs:
minSyncPeriod: 10s
syncPeriod: 60s
kind: KubeProxyConfiguration
metricsBindAddress: "%s"
mode: "iptables"
Expand Down Expand Up @@ -347,12 +350,17 @@ udpTimeoutMilliseconds: 123ms
MinSyncPeriod: metav1.Duration{Duration: 10 * time.Second},
SyncPeriod: metav1.Duration{Duration: 60 * time.Second},
},
IPVS: componentconfig.KubeProxyIPVSConfiguration{
MinSyncPeriod: metav1.Duration{Duration: 10 * time.Second},
SyncPeriod: metav1.Duration{Duration: 60 * time.Second},
},
MetricsBindAddress: tc.metricsBindAddress,
Mode: "iptables",
OOMScoreAdj: utilpointer.Int32Ptr(17),
PortRange: "2-7",
ResourceContainer: "/foo",
UDPIdleTimeout: metav1.Duration{Duration: 123 * time.Millisecond},
// TODO: IPVS
OOMScoreAdj: utilpointer.Int32Ptr(17),
PortRange: "2-7",
ResourceContainer: "/foo",
UDPIdleTimeout: metav1.Duration{Duration: 123 * time.Millisecond},
}

options, err := NewOptions()
Expand Down
15 changes: 15 additions & 0 deletions pkg/apis/componentconfig/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,19 @@ type KubeProxyIPTablesConfiguration struct {
MinSyncPeriod metav1.Duration
}

// KubeProxyIPVSConfiguration contains ipvs-related configuration
// details for the Kubernetes proxy server.
type KubeProxyIPVSConfiguration struct {
// syncPeriod is the period that ipvs rules are refreshed (e.g. '5s', '1m',
// '2h22m'). Must be greater than 0.
SyncPeriod metav1.Duration
// minSyncPeriod is the minimum period that ipvs rules are refreshed (e.g. '5s', '1m',
// '2h22m').
MinSyncPeriod metav1.Duration
// ipvs scheduler
Scheduler string
}

// KubeProxyConntrackConfiguration contains conntrack settings for
// the Kubernetes proxy server.
type KubeProxyConntrackConfiguration struct {
Expand Down Expand Up @@ -112,6 +125,8 @@ type KubeProxyConfiguration struct {
ClientConnection ClientConnectionConfiguration
// iptables contains iptables-related configuration options.
IPTables KubeProxyIPTablesConfiguration
// ipvs contains ipvs-related configuration options.
IPVS KubeProxyIPVSConfiguration
// oomScoreAdj is the oom-score-adj value for kube-proxy process. Values must be within
// the range [-1000, 1000]
OOMScoreAdj *int32
Expand Down
15 changes: 15 additions & 0 deletions pkg/apis/componentconfig/v1alpha1/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,19 @@ type KubeProxyIPTablesConfiguration struct {
MinSyncPeriod metav1.Duration `json:"minSyncPeriod"`
}

// KubeProxyIPVSConfiguration contains ipvs-related configuration
// details for the Kubernetes proxy server.
type KubeProxyIPVSConfiguration struct {
// syncPeriod is the period that ipvs rules are refreshed (e.g. '5s', '1m',
// '2h22m'). Must be greater than 0.
SyncPeriod metav1.Duration `json:"syncPeriod"`
// minSyncPeriod is the minimum period that ipvs rules are refreshed (e.g. '5s', '1m',
// '2h22m').
MinSyncPeriod metav1.Duration `json:"minSyncPeriod"`
// ipvs scheduler
Scheduler string `json:"scheduler"`
}

// KubeProxyConntrackConfiguration contains conntrack settings for
// the Kubernetes proxy server.
type KubeProxyConntrackConfiguration struct {
Expand Down Expand Up @@ -112,6 +125,8 @@ type KubeProxyConfiguration struct {
ClientConnection ClientConnectionConfiguration `json:"clientConnection"`
// iptables contains iptables-related configuration options.
IPTables KubeProxyIPTablesConfiguration `json:"iptables"`
// ipvs contains ipvs-related configuration options.
IPVS KubeProxyIPVSConfiguration `json:"ipvs"`
// oomScoreAdj is the oom-score-adj value for kube-proxy process. Values must be within
// the range [-1000, 1000]
OOMScoreAdj *int32 `json:"oomScoreAdj"`
Expand Down
Loading

0 comments on commit 5ed2b44

Please sign in to comment.