Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add dynamic config metrics #57527

Merged
merged 1 commit into from
May 24, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pkg/kubelet/kubeletconfig/status/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ go_library(
importpath = "k8s.io/kubernetes/pkg/kubelet/kubeletconfig/status",
deps = [
"//pkg/kubelet/kubeletconfig/util/log:go_default_library",
"//pkg/kubelet/metrics:go_default_library",
"//pkg/util/node:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
Expand Down
19 changes: 19 additions & 0 deletions pkg/kubelet/kubeletconfig/status/status.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"k8s.io/apimachinery/pkg/types"
clientset "k8s.io/client-go/kubernetes"
utillog "k8s.io/kubernetes/pkg/kubelet/kubeletconfig/util/log"
"k8s.io/kubernetes/pkg/kubelet/metrics"
nodeutil "k8s.io/kubernetes/pkg/util/node"
)

Expand Down Expand Up @@ -176,6 +177,24 @@ func (s *nodeConfigStatus) Sync(client clientset.Interface, nodeName string) {
status.Error = s.errorOverride
}

// update metrics based on the status we will sync
metrics.SetConfigError(len(status.Error) > 0)
err = metrics.SetAssignedConfig(status.Assigned)
if err != nil {
err = fmt.Errorf("failed to update Assigned config metric, error: %v", err)
return
}
err = metrics.SetActiveConfig(status.Active)
if err != nil {
err = fmt.Errorf("failed to update Active config metric, error: %v", err)
return
}
err = metrics.SetLastKnownGoodConfig(status.LastKnownGood)
if err != nil {
err = fmt.Errorf("failed to update LastKnownGood config metric, error: %v", err)
return
}

// apply the status to a copy of the node so we don't modify the object in the informer's store
newNode := oldNode.DeepCopy()
newNode.Status.Config = status
Expand Down
3 changes: 3 additions & 0 deletions pkg/kubelet/metrics/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,12 @@ go_library(
srcs = ["metrics.go"],
importpath = "k8s.io/kubernetes/pkg/kubelet/metrics",
deps = [
"//pkg/features:go_default_library",
"//pkg/kubelet/container:go_default_library",
"//vendor/github.com/golang/glog:go_default_library",
"//vendor/github.com/prometheus/client_golang/prometheus:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library",
],
)

Expand Down
140 changes: 140 additions & 0 deletions pkg/kubelet/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,15 @@ limitations under the License.
package metrics

import (
"fmt"
"sync"
"time"

"github.com/golang/glog"
"github.com/prometheus/client_golang/prometheus"
corev1 "k8s.io/api/core/v1"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/kubernetes/pkg/features"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
)

Expand All @@ -47,6 +51,17 @@ const (
// Metrics keys of device plugin operations
DevicePluginRegistrationCountKey = "device_plugin_registration_count"
DevicePluginAllocationLatencyKey = "device_plugin_alloc_latency_microseconds"

// Metric keys for node config
AssignedConfigKey = "node_config_assigned"
ActiveConfigKey = "node_config_active"
LastKnownGoodConfigKey = "node_config_last_known_good"
ConfigErrorKey = "node_config_error"
ConfigSourceLabelKey = "node_config_source"
ConfigSourceLabelValueLocal = "local"
ConfigUIDLabelKey = "node_config_uid"
ConfigResourceVersionLabelKey = "node_config_resource_version"
KubeletConfigKeyLabelKey = "node_config_kubelet_key"
)

var (
Expand Down Expand Up @@ -150,6 +165,40 @@ var (
},
[]string{"resource_name"},
)

// Metrics for node config

AssignedConfig = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Subsystem: KubeletSubsystem,
Name: AssignedConfigKey,
Help: "The node's understanding of intended config. The count is always 1.",
},
[]string{ConfigSourceLabelKey, ConfigUIDLabelKey, ConfigResourceVersionLabelKey, KubeletConfigKeyLabelKey},
)
ActiveConfig = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Subsystem: KubeletSubsystem,
Name: ActiveConfigKey,
Help: "The config source the node is actively using. The count is always 1.",
},
[]string{ConfigSourceLabelKey, ConfigUIDLabelKey, ConfigResourceVersionLabelKey, KubeletConfigKeyLabelKey},
)
LastKnownGoodConfig = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Subsystem: KubeletSubsystem,
Name: LastKnownGoodConfigKey,
Help: "The config source the node will fall back to when it encounters certain errors. The count is always 1.",
},
[]string{ConfigSourceLabelKey, ConfigUIDLabelKey, ConfigResourceVersionLabelKey, KubeletConfigKeyLabelKey},
)
ConfigError = prometheus.NewGauge(
prometheus.GaugeOpts{
Subsystem: KubeletSubsystem,
Name: ConfigErrorKey,
Help: "This metric is true (1) if the node is experiencing a configuration-related error, false (0) otherwise.",
},
)
)

var registerMetrics sync.Once
Expand All @@ -172,6 +221,12 @@ func Register(containerCache kubecontainer.RuntimeCache, collectors ...prometheu
prometheus.MustRegister(EvictionStatsAge)
prometheus.MustRegister(DevicePluginRegistrationCount)
prometheus.MustRegister(DevicePluginAllocationLatency)
if utilfeature.DefaultFeatureGate.Enabled(features.DynamicKubeletConfig) {
prometheus.MustRegister(AssignedConfig)
prometheus.MustRegister(ActiveConfig)
prometheus.MustRegister(LastKnownGoodConfig)
prometheus.MustRegister(ConfigError)
}
for _, collector := range collectors {
prometheus.MustRegister(collector)
}
Expand Down Expand Up @@ -232,3 +287,88 @@ func (pc *podAndContainerCollector) Collect(ch chan<- prometheus.Metric) {
prometheus.GaugeValue,
float64(runningContainers))
}

const configMapAPIPathFmt = "/api/v1/namespaces/%s/configmaps/%s"

func configLabels(source *corev1.NodeConfigSource) (map[string]string, error) {
if source == nil {
return map[string]string{
// prometheus requires all of the labels that can be set on the metric
ConfigSourceLabelKey: "local",
ConfigUIDLabelKey: "",
ConfigResourceVersionLabelKey: "",
KubeletConfigKeyLabelKey: "",
}, nil
}
if source.ConfigMap != nil {
return map[string]string{
ConfigSourceLabelKey: fmt.Sprintf(configMapAPIPathFmt, source.ConfigMap.Namespace, source.ConfigMap.Name),
ConfigUIDLabelKey: string(source.ConfigMap.UID),
ConfigResourceVersionLabelKey: source.ConfigMap.ResourceVersion,
KubeletConfigKeyLabelKey: source.ConfigMap.KubeletConfigKey,
}, nil
}
return nil, fmt.Errorf("unrecognized config source type, all source subfields were nil")
}

// track labels across metric updates, so we can delete old label sets and prevent leaks
var assignedConfigLabels map[string]string = map[string]string{}

func SetAssignedConfig(source *corev1.NodeConfigSource) error {
// compute the timeseries labels from the source
labels, err := configLabels(source)
if err != nil {
return err
}
// clean up the old timeseries (WithLabelValues creates a new one for each distinct label set)
AssignedConfig.Delete(assignedConfigLabels)
// record the new timeseries
assignedConfigLabels = labels
// expose the new timeseries with a constant count of 1
AssignedConfig.With(assignedConfigLabels).Set(1)
return nil
}

// track labels across metric updates, so we can delete old label sets and prevent leaks
var activeConfigLabels map[string]string = map[string]string{}

func SetActiveConfig(source *corev1.NodeConfigSource) error {
// compute the timeseries labels from the source
labels, err := configLabels(source)
if err != nil {
return err
}
// clean up the old timeseries (WithLabelValues creates a new one for each distinct label set)
ActiveConfig.Delete(activeConfigLabels)
// record the new timeseries
activeConfigLabels = labels
// expose the new timeseries with a constant count of 1
ActiveConfig.With(activeConfigLabels).Set(1)
return nil
}

// track labels across metric updates, so we can delete old label sets and prevent leaks
var lastKnownGoodConfigLabels map[string]string = map[string]string{}

func SetLastKnownGoodConfig(source *corev1.NodeConfigSource) error {
// compute the timeseries labels from the source
labels, err := configLabels(source)
if err != nil {
return err
}
// clean up the old timeseries (WithLabelValues creates a new one for each distinct label set)
LastKnownGoodConfig.Delete(lastKnownGoodConfigLabels)
// record the new timeseries
lastKnownGoodConfigLabels = labels
// expose the new timeseries with a constant count of 1
LastKnownGoodConfig.With(lastKnownGoodConfigLabels).Set(1)
return nil
}

func SetConfigError(err bool) {
if err {
ConfigError.Set(1)
} else {
ConfigError.Set(0)
}
}
3 changes: 2 additions & 1 deletion test/e2e_node/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ go_test(
"//pkg/kubelet/types:go_default_library",
"//pkg/security/apparmor:go_default_library",
"//test/e2e/framework:go_default_library",
"//test/e2e/framework/metrics:go_default_library",
"//test/e2e_node/services:go_default_library",
"//test/utils/image:go_default_library",
"//vendor/github.com/blang/semver:go_default_library",
Expand All @@ -147,6 +148,7 @@ go_test(
"//vendor/github.com/onsi/gomega:go_default_library",
"//vendor/github.com/onsi/gomega/gstruct:go_default_library",
"//vendor/github.com/onsi/gomega/types:go_default_library",
"//vendor/github.com/prometheus/common/model:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/equality:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/errors:go_default_library",
Expand All @@ -164,7 +166,6 @@ go_test(
] + select({
"@io_bazel_rules_go//go/platform:linux": [
"//test/e2e/common:go_default_library",
"//test/e2e/framework/metrics:go_default_library",
"//test/e2e_node/system:go_default_library",
"//test/utils:go_default_library",
"//vendor/github.com/kardianos/osext:go_default_library",
Expand Down
Loading