Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Backport issue 32551 #36099

Merged
merged 3 commits into from
Nov 5, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 40 additions & 13 deletions cmd/kube-proxy/app/conntrack.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,24 +27,33 @@ import (
"k8s.io/kubernetes/pkg/util/sysctl"
)

// Conntracker is an interface to the global sysctl. Descriptions of the various
// sysctl fields can be found here:
//
// https://www.kernel.org/doc/Documentation/networking/nf_conntrack-sysctl.txt
type Conntracker interface {
// SetMax adjusts nf_conntrack_max.
SetMax(max int) error
// SetTCPEstablishedTimeout adjusts nf_conntrack_tcp_timeout_established.
SetTCPEstablishedTimeout(seconds int) error
// SetTCPCloseWaitTimeout nf_conntrack_tcp_timeout_close_wait.
SetTCPCloseWaitTimeout(seconds int) error
}

type realConntracker struct{}

var readOnlySysFSError = errors.New("ReadOnlySysFS")

func (realConntracker) SetMax(max int) error {
glog.Infof("Setting nf_conntrack_max to %d", max)
if err := sysctl.New().SetSysctl("net/netfilter/nf_conntrack_max", max); err != nil {
func (rct realConntracker) SetMax(max int) error {
if err := rct.setIntSysCtl("nf_conntrack_max", max); err != nil {
return err
}
// sysfs is expected to be mounted as 'rw'. However, it may be unexpectedly mounted as
// 'ro' by docker because of a known docker issue (https://github.com/docker/docker/issues/24000).
// Setting conntrack will fail when sysfs is readonly. When that happens, we don't set conntrack
// hashsize and return a special error readOnlySysFSError here. The caller should deal with
// sysfs is expected to be mounted as 'rw'. However, it may be
// unexpectedly mounted as 'ro' by docker because of a known docker
// issue (https://github.com/docker/docker/issues/24000). Setting
// conntrack will fail when sysfs is readonly. When that happens, we
// don't set conntrack hashsize and return a special error
// readOnlySysFSError here. The caller should deal with
// readOnlySysFSError differently.
writable, err := isSysFSWritable()
if err != nil {
Expand All @@ -58,9 +67,22 @@ func (realConntracker) SetMax(max int) error {
return ioutil.WriteFile("/sys/module/nf_conntrack/parameters/hashsize", []byte(strconv.Itoa(max/4)), 0640)
}

func (realConntracker) SetTCPEstablishedTimeout(seconds int) error {
glog.Infof("Setting nf_conntrack_tcp_timeout_established to %d", seconds)
return sysctl.New().SetSysctl("net/netfilter/nf_conntrack_tcp_timeout_established", seconds)
func (rct realConntracker) SetTCPEstablishedTimeout(seconds int) error {
return rct.setIntSysCtl("nf_conntrack_tcp_timeout_established", seconds)
}

func (rct realConntracker) SetTCPCloseWaitTimeout(seconds int) error {
return rct.setIntSysCtl("nf_conntrack_tcp_timeout_close_wait", seconds)
}

func (realConntracker) setIntSysCtl(name string, value int) error {
entry := "net/netfilter/" + name

glog.Infof("Set sysctl '%v' to %v", entry, value)
if err := sysctl.New().SetSysctl(entry, value); err != nil {
return err
}
return nil
}

// isSysFSWritable checks /proc/mounts to see whether sysfs is 'rw' or not.
Expand All @@ -73,16 +95,21 @@ func isSysFSWritable() (bool, error) {
glog.Errorf("failed to list mount points: %v", err)
return false, err
}

for _, mountPoint := range mountPoints {
const sysfsDevice = "sysfs"
if mountPoint.Device != sysfsDevice {
continue
}
// Check whether sysfs is 'rw'
const permWritable = "rw"
if len(mountPoint.Opts) > 0 && mountPoint.Opts[0] == permWritable {
return true, nil
}
glog.Errorf("sysfs is not writable: %+v", mountPoint)
break
glog.Errorf("sysfs is not writable: %+v (mount options are %v)",
mountPoint, mountPoint.Opts)
return false, readOnlySysFSError
}
return false, nil

return false, errors.New("No sysfs mounted")
}
5 changes: 5 additions & 0 deletions cmd/kube-proxy/app/options/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,5 +92,10 @@ func (s *ProxyServerConfig) AddFlags(fs *pflag.FlagSet) {
fs.Int32Var(&s.ConntrackMin, "conntrack-min", s.ConntrackMin,
"Minimum number of conntrack entries to allocate, regardless of conntrack-max-per-core (set conntrack-max-per-core=0 to leave the limit as-is).")
fs.DurationVar(&s.ConntrackTCPEstablishedTimeout.Duration, "conntrack-tcp-timeout-established", s.ConntrackTCPEstablishedTimeout.Duration, "Idle timeout for established TCP connections (0 to leave as-is)")
fs.DurationVar(
&s.ConntrackTCPCloseWaitTimeout.Duration, "conntrack-tcp-timeout-close-wait",
s.ConntrackTCPCloseWaitTimeout.Duration,
"NAT timeout for TCP connections in the CLOSE_WAIT state")

config.DefaultFeatureGate.AddFlag(fs)
}
14 changes: 12 additions & 2 deletions cmd/kube-proxy/app/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -315,12 +315,22 @@ func (s *ProxyServer) Run() error {
// administrator should decide whether and how to handle this issue,
// whether to drain the node and restart docker.
// TODO(random-liu): Remove this when the docker bug is fixed.
const message = "DOCKER RESTART NEEDED (docker issue #24000): /sys is read-only: can't raise conntrack limits, problems may arise later."
const message = "DOCKER RESTART NEEDED (docker issue #24000): /sys is read-only: " +
"cannot modify conntrack limits, problems may arise later."
s.Recorder.Eventf(s.Config.NodeRef, api.EventTypeWarning, err.Error(), message)
}
}

if s.Config.ConntrackTCPEstablishedTimeout.Duration > 0 {
if err := s.Conntracker.SetTCPEstablishedTimeout(int(s.Config.ConntrackTCPEstablishedTimeout.Duration / time.Second)); err != nil {
timeout := int(s.Config.ConntrackTCPEstablishedTimeout.Duration / time.Second)
if err := s.Conntracker.SetTCPEstablishedTimeout(timeout); err != nil {
return err
}
}

if s.Config.ConntrackTCPCloseWaitTimeout.Duration > 0 {
timeout := int(s.Config.ConntrackTCPCloseWaitTimeout.Duration / time.Second)
if err := s.Conntracker.SetTCPCloseWaitTimeout(timeout); err != nil {
return err
}
}
Expand Down
2 changes: 2 additions & 0 deletions hack/.linted_packages
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,8 @@ test/images/fakegitserver
test/images/goproxy
test/images/mount-tester
test/images/n-way-http
test/images/net
test/images/net/common
test/images/port-forward-tester
test/images/porter
test/images/resource-consumer/consume-cpu
Expand Down
1 change: 1 addition & 0 deletions hack/verify-flags/known-flags.txt
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ configure-cloud-routes
conntrack-max
conntrack-max-per-core
conntrack-min
conntrack-tcp-timeout-close-wait
conntrack-tcp-timeout-established
consumer-port
consumer-service-name
Expand Down
Loading