Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Capture CNI logs in istioctl bug-report #45712

Merged
merged 3 commits into from
Jun 30, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions tools/bug-report/pkg/archive/archive.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ const (
clusterInfoSubdir = "cluster"
analyzeSubdir = "analyze"
operatorLogsPathSubdir = "operator"
cniLogsPathSubdir = "cni"
)

var (
Expand Down Expand Up @@ -68,6 +69,10 @@ func ClusterInfoPath(rootDir string) string {
return filepath.Join(getRootDir(rootDir), clusterInfoSubdir)
}

func CniPath(rootDir, pod string) string {
return filepath.Join(getRootDir(rootDir), cniLogsPathSubdir, pod)
}

// Create creates a gzipped tar file from srcDir and writes it to outPath.
func Create(srcDir, outPath string) error {
mw, err := os.Create(outPath)
Expand Down
42 changes: 33 additions & 9 deletions tools/bug-report/pkg/bugreport/bugreport.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ import (

"github.com/kr/pretty"
"github.com/spf13/cobra"

label2 "istio.io/api/label"
"istio.io/istio/istioctl/pkg/cli"
"istio.io/istio/istioctl/pkg/util/ambient"
Expand Down Expand Up @@ -158,7 +157,7 @@ func runBugReportCommand(ctx cli.Context, _ *cobra.Command, logOpts *log.Options
return err
}

common.LogAndPrintf("\n\nFetching proxy logs for the following containers:\n\n%s\n", strings.Join(paths, "\n"))
common.LogAndPrintf("\n\nFetching logs for the following containers:\n\n%s\n", strings.Join(paths, "\n"))

gatherInfo(runner, config, resources, paths)
if len(gErrors) != 0 {
Expand Down Expand Up @@ -312,6 +311,11 @@ func gatherInfo(runner *kubectlcmd.Runner, config *config.BugReportConfig, resou
getFromCluster(content.GetSecrets, params.SetVerbose(config.FullSecrets), clusterDir, &mandatoryWg)
getFromCluster(content.GetDescribePods, params.SetIstioNamespace(config.IstioNamespace), clusterDir, &mandatoryWg)

common.LogAndPrintf("\nFetching CNI logs from cluster.\n\n")
for _, cniPod := range resources.CniPod {
getCniLogs(runner, config, resources, cniPod.Name, &mandatoryWg)
}

// optionalWg is subject to timer.
var optionalWg sync.WaitGroup
for _, p := range paths {
Expand Down Expand Up @@ -390,7 +394,7 @@ func getProxyLogs(runner *kubectlcmd.Runner, config *config.BugReportConfig, res
path, namespace, pod, container string, wg *sync.WaitGroup,
) {
wg.Add(1)
log.Infof("Waiting on logs %s", pod)
log.Infof("Waiting on proxy logs %v/%v/%v", namespace, pod, container)
go func() {
defer func() {
wg.Done()
Expand All @@ -404,7 +408,7 @@ func getProxyLogs(runner *kubectlcmd.Runner, config *config.BugReportConfig, res
logs[path], stats[path], importance[path] = clog, cstat, imp
}
lock.Unlock()
log.Infof("Done with logs %s", pod)
log.Infof("Done with proxy logs %v/%v/%v", namespace, pod, container)
}()
}

Expand All @@ -414,7 +418,7 @@ func getIstiodLogs(runner *kubectlcmd.Runner, config *config.BugReportConfig, re
namespace, pod string, wg *sync.WaitGroup,
) {
wg.Add(1)
log.Infof("Waiting on logs %s", pod)
log.Infof("Waiting on Istiod logs for %v/%v", namespace, pod)
go func() {
defer func() {
wg.Done()
Expand All @@ -424,7 +428,7 @@ func getIstiodLogs(runner *kubectlcmd.Runner, config *config.BugReportConfig, re
clog, _, _, err := getLog(runner, resources, config, namespace, pod, common.DiscoveryContainerName)
appendGlobalErr(err)
writeFile(filepath.Join(archive.IstiodPath(tempDir, namespace, pod), "discovery.log"), clog, config.DryRun)
log.Infof("Done with logs %s", pod)
log.Infof("Done with Istiod logs for %v/%v", namespace, pod)
}()
}

Expand All @@ -433,7 +437,7 @@ func getOperatorLogs(runner *kubectlcmd.Runner, config *config.BugReportConfig,
namespace, pod string, wg *sync.WaitGroup,
) {
wg.Add(1)
log.Infof("Waiting on logs %s", pod)
log.Infof("Waiting on operator logs for %v/%v", namespace, pod)
go func() {
defer func() {
wg.Done()
Expand All @@ -443,7 +447,27 @@ func getOperatorLogs(runner *kubectlcmd.Runner, config *config.BugReportConfig,
clog, _, _, err := getLog(runner, resources, config, namespace, pod, common.OperatorContainerName)
appendGlobalErr(err)
writeFile(filepath.Join(archive.OperatorPath(tempDir, namespace, pod), "operator.log"), clog, config.DryRun)
log.Infof("Done with logs %s", pod)
log.Infof("Done with operator logs for %v/%v", namespace, pod)
}()
}

// getCniLogs fetches Cni logs from istio-cni-node daemonsets inside namespace kube-system and writes the output
// Runs if a goroutine, with errors reported through gErrors
func getCniLogs(runner *kubectlcmd.Runner, config *config.BugReportConfig, resources *cluster2.Resources,
pod string, wg *sync.WaitGroup,
) {
wg.Add(1)
log.Infof("Waiting on CNI logs for %v", pod)
go func() {
defer func() {
wg.Done()
logRuntime(time.Now(), "Done getting CNI logs for %v", pod)
}()

clog, _, _, err := getLog(runner, resources, config, common.KubeSystemNamespace, pod, "")
syw14 marked this conversation as resolved.
Show resolved Hide resolved
appendGlobalErr(err)
writeFile(filepath.Join(archive.CniPath(tempDir, pod), "cni.log"), clog, config.DryRun)
log.Infof("Done with CNI logs %v", pod)
}()
}

Expand All @@ -458,7 +482,7 @@ func getLog(runner *kubectlcmd.Runner, resources *cluster2.Resources, config *co
if err != nil {
return "", nil, 0, err
}
if resources.ContainerRestarts(namespace, pod, container) > 0 {
if resources.ContainerRestarts(namespace, pod, container, common.IsCniPod(pod)) > 0 {
pclog, err := runner.Logs(namespace, pod, container, true, config.DryRun)
if err != nil {
return "", nil, 0, err
Expand Down
17 changes: 15 additions & 2 deletions tools/bug-report/pkg/cluster/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,7 @@ func GetClusterResources(ctx context.Context, clientset *kubernetes.Clientset, c
Labels: make(map[string]map[string]string),
Annotations: make(map[string]map[string]string),
Pod: make(map[string]*corev1.Pod),
CniPod: make(map[string]*corev1.Pod),
}

pods, err := clientset.CoreV1().Pods("").List(ctx, metav1.ListOptions{})
Expand All @@ -230,6 +231,10 @@ func GetClusterResources(ctx context.Context, clientset *kubernetes.Clientset, c
}

for i, p := range pods.Items {
if p.Labels["k8s-app"] == "istio-cni-node" {
out.CniPod[PodKey(p.Namespace, p.Name)] = &pods.Items[i]
}

if analyzer_util.IsSystemNamespace(resource.Namespace(p.Namespace)) {
continue
}
Expand Down Expand Up @@ -276,6 +281,8 @@ type Resources struct {
Annotations map[string]map[string]string
// Pod maps a pod name to its Pod info. The key is namespace/pod-name.
Pod map[string]*corev1.Pod
// CniPod
CniPod map[string]*corev1.Pod
}

func (r *Resources) insertContainer(namespace, deployment, pod, container string) {
Expand All @@ -298,8 +305,14 @@ func (r *Resources) insertContainer(namespace, deployment, pod, container string
}

// ContainerRestarts returns the number of container restarts for the given container.
func (r *Resources) ContainerRestarts(namespace, pod, container string) int {
for _, cs := range r.Pod[PodKey(namespace, pod)].Status.ContainerStatuses {
func (r *Resources) ContainerRestarts(namespace, pod, container string, isCniPod bool) int {
var podItem *corev1.Pod
if isCniPod {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we need this? I thought r.Pod is a superset of r.CniPod

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added CniPod as a new field in Resource due to there's some pre defined config that will exclude all pods in namespace kube-system for r.Pod when building the "resource tree" of namespace/pod/container.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see...

It seems like a bit weird to have CNI code in ContainerRestarts, a bit fragile. Maybe it makes sense to have an AllPods one that isn't filtered...

Or we can keep this, not a big deal

podItem = r.CniPod[PodKey(namespace, pod)]
} else {
podItem = r.Pod[PodKey(namespace, pod)]
}
for _, cs := range podItem.Status.ContainerStatuses {
if cs.Name == container {
return int(cs.RestartCount)
}
Expand Down
10 changes: 8 additions & 2 deletions tools/bug-report/pkg/common/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ package common

import (
"fmt"
"strings"

"istio.io/istio/pkg/log"
)
Expand All @@ -30,8 +31,9 @@ const (
OperatorContainerName = "istio-operator"

// namespaceAll is the default argument of across all namespaces
NamespaceAll = ""
StrNamespaceAll = "allNamespaces"
NamespaceAll = ""
StrNamespaceAll = "allNamespaces"
KubeSystemNamespace = "kube-system"
)

type kv struct {
Expand Down Expand Up @@ -127,6 +129,10 @@ func IsOperatorContainer(_, container string) bool {
return container == OperatorContainerName
}

func IsCniPod(pod string) bool {
return strings.HasPrefix(pod, "istio-cni-node")
}

func getVersionKey(clusterVersion string) string {
if versionMap[clusterVersion] == nil {
return latestKey
Expand Down