Skip to content

WatchList use more temp memory than legacy ListWatch #129467

Closed
@249043822

Description

What happened?

there are 9812 pods in kube-system, we open different number of informers to test memory usage:

./pod_informer -count=10 -namespace=kube-system -timeout=2m -kubeconfig=/etc/kubernetes/kubeconfig -enableWatchListFeature=false -v=4
./pod_informer -count=20 -namespace=kube-system -timeout=2m -kubeconfig=/etc/kubernetes/kubeconfig -enableWatchListFeature=false -v=4
./pod_informer -count=30 -namespace=kube-system -timeout=2m -kubeconfig=/etc/kubernetes/kubeconfig -enableWatchListFeature=false -v=4
./pod_informer -count=40 -namespace=kube-system -timeout=2m -kubeconfig=/etc/kubernetes/kubeconfig -enableWatchListFeature=false -v=4

watchlist-off

./pod_informer -count=10 -namespace=kube-system -timeout=2m -kubeconfig=/etc/kubernetes/kubeconfig -enableWatchListFeature=true -v=4
./pod_informer -count=20 -namespace=kube-system -timeout=2m -kubeconfig=/etc/kubernetes/kubeconfig -enableWatchListFeature=true -v=4
./pod_informer -count=30 -namespace=kube-system -timeout=2m -kubeconfig=/etc/kubernetes/kubeconfig -enableWatchListFeature=true -v=4
./pod_informer -count=40 -namespace=kube-system -timeout=2m -kubeconfig=/etc/kubernetes/kubeconfig -enableWatchListFeature=true -v=4

watchlist-on

we can see, only 30 informers , we lost the apiserver

What did you expect to happen?

WatchList use less temp memory than legacy ListWatch

How can we reproduce it (as minimally and precisely as possible)?

we have all 670722 pods in watchcache

etcdctl --cacert=/etc/kubernetes/certs/etcd/Etcdtrustca.crt --cert=/etc/kubernetes/certs/etcd/etcd-client.crt --key=/etc/kubernetes/certs/etcd/etcd-client.key --endpoints=$etcd_eps get /registry/pods --prefix --keys-only --command-timeout=10s| grep /registry/pods | wc -l
670722

the pod_informer binary code:

/*
Copyright 2023 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package main

import (
    "context"
    "flag"
    "os"
    "time"

    "k8s.io/apimachinery/pkg/util/wait"
    v1 "k8s.io/client-go/informers/core/v1"
    "k8s.io/client-go/kubernetes"
    "k8s.io/client-go/tools/cache"
    "k8s.io/client-go/tools/clientcmd"
    "k8s.io/klog/v2"
)

var (
    kubeconfig                  string
    targetNamespace             string
    informerCount               int
    testTimeout                 time.Duration
    enableWatchListAlphaFeature bool
)

func main() {
    registerFlags()
    flag.Parse()

    klog.Info("The test binary started with the following arguments:")
    flag.VisitAll(func(f *flag.Flag) {
       klog.Infof("  -%s=%v (%s)\n", f.Name, f.Value, f.Usage)
    })

    if enableWatchListAlphaFeature {
       os.Setenv("ENABLE_CLIENT_GO_WATCH_LIST_ALPHA", "true")
    }

    ctx, cancel := context.WithTimeout(context.Background(), testTimeout)
    defer cancel()
    config, err := clientcmd.BuildConfigFromFlags("", kubeconfig)
    if err != nil {
       klog.Fatal(err)
    }
    config.AcceptContentTypes = "application/vnd.kubernetes.protobuf,application/json"
    config.ContentType = "application/vnd.kubernetes.protobuf"
	config.QPS = 100
	config.Burst = 200
    klog.Infof("The following Kubernetes client config will be used\n%v", config.String())

    client, err := kubernetes.NewForConfig(config)
    if err != nil {
       klog.Fatal(err)
    }


       ts := time.Now()
       ctxInformer, cancelInformers := context.WithCancel(ctx)
       defer cancelInformers()

       klog.Infof("Starting %d pod informers for targetNamespace = %s", informerCount, targetNamespace)
       informersSynced := startInformersFor(ctxInformer, client, informerCount, targetNamespace)

       klog.Info("Waiting for the pod informers to synced")
       cache.WaitForCacheSync(ctx.Done(), informersSynced...)
       klog.Infof("All %v pod informers synced, time needed = %v", len(informersSynced), time.Now().Sub(ts))


    klog.Info("Exiting the test app")
}

func registerFlags() {
    klog.InitFlags(flag.CommandLine)

    flag.StringVar(&kubeconfig, "kubeconfig", "", "path to kubeconfig.")
    flag.StringVar(&targetNamespace, "namespace", "", "namespace that host pods to list. If empty a default ('') value will be used.")
    flag.IntVar(&informerCount, "count", 4, "the number of informers per targetNamespace to run. If empty a default (4) value will be used.")
    flag.DurationVar(&testTimeout, "timeout", time.Minute, "timeout duration for the test")
    flag.BoolVar(&enableWatchListAlphaFeature, "enableWatchListFeature", false, "whether to set ENABLE_CLIENT_GO_WATCH_LIST_ALPHA env var")
}

func startInformersFor(ctx context.Context, client kubernetes.Interface, count int, namespace string) []cache.InformerSynced {
    var informersSynced []cache.InformerSynced
    for i := 0; i < count; i++ {
       inf := v1.NewPodInformer(client, namespace, time.Duration(0), cache.Indexers{})
       inf.SetTransform(func(interface{}) (interface{}, error) {
          return nil, nil
       })
       informersSynced = append(informersSynced, inf.HasSynced)
       go inf.Run(ctx.Done())
    }
    return informersSynced
}

Anything else we need to know?

No response

Kubernetes version

$ kubectl version
# paste output here

1.32.0

Cloud provider

OS version

# On Linux:
$ cat /etc/os-release
# paste output here
$ uname -a
# paste output here

# On Windows:
C:\> wmic os get Caption, Version, BuildNumber, OSArchitecture
# paste output here

Install tools

Container runtime (CRI) and version (if applicable)

Related plugins (CNI, CSI, ...) and versions (if applicable)

Metadata

Assignees

No one assigned

    Labels

    kind/bugCategorizes issue or PR as related to a bug.needs-triageIndicates an issue or PR lacks a `triage/foo` label and requires one.sig/api-machineryCategorizes an issue or PR as relevant to SIG API Machinery.sig/scalabilityCategorizes an issue or PR as relevant to SIG Scalability.

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions