Skip to content

Commit

Permalink
Merge pull request kubernetes#387 from xueweiz/test-pr
Browse files Browse the repository at this point in the history
Add a few behavioral e2e tests
  • Loading branch information
k8s-ci-robot authored Dec 6, 2019
2 parents 7dc84e8 + 7d28dde commit 9d584df
Show file tree
Hide file tree
Showing 163 changed files with 182,678 additions and 63 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/bin/
/Dockerfile
/test/bin/
/*.tar.gz
ci.env
pr.env
Expand Down
19 changes: 15 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ PKG:=k8s.io/node-problem-detector
# PKG_SOURCES are all the go source code.
PKG_SOURCES:=$(shell find pkg cmd -name '*.go')

# PARALLEL specifies the number of parallel test nodes to run for e2e tests.
PARALLEL?=3

# TARBALL is the name of release tar. Include binary version by default.
TARBALL?=node-problem-detector-$(VERSION).tar.gz

Expand Down Expand Up @@ -103,6 +106,13 @@ endif
-tags "$(BUILD_TAGS)" \
./cmd/nodeproblemdetector

./test/bin/problem-maker: $(PKG_SOURCES)
CGO_ENABLED=$(CGO_ENABLED) GOOS=linux GO111MODULE=on go build \
-mod vendor \
-o test/bin/problem-maker \
-tags "$(BUILD_TAGS)" \
./test/e2e/problemmaker/problem_maker.go

Dockerfile: Dockerfile.in
sed -e 's|@BASEIMAGE@|$(BASEIMAGE)|g' $< >$@
ifneq ($(ENABLE_JOURNALD), 1)
Expand All @@ -115,8 +125,8 @@ test: vet fmt
GO111MODULE=on go test -mod vendor -timeout=1m -v -race -short -tags "$(BUILD_TAGS)" ./...

e2e-test: vet fmt build-tar
GO111MODULE=on go test -mod vendor -timeout=10m -v -tags "$(BUILD_TAGS)" \
./test/e2e/metriconly/... \
GO111MODULE=on ginkgo -nodes=$(PARALLEL) -mod vendor -timeout=10m -v -tags "$(BUILD_TAGS)" \
./test/e2e/metriconly/... -- \
-project=$(PROJECT) -zone=$(ZONE) \
-image=$(VM_IMAGE) -image-family=$(IMAGE_FAMILY) -image-project=$(IMAGE_PROJECT) \
-ssh-user=$(SSH_USER) -ssh-key=$(SSH_KEY) \
Expand All @@ -129,8 +139,8 @@ build-binaries: ./bin/node-problem-detector ./bin/log-counter
build-container: build-binaries Dockerfile
docker build -t $(IMAGE) .

build-tar: ./bin/node-problem-detector ./bin/log-counter
tar -zcvf $(TARBALL) bin/ config/ test/e2e-install.sh
build-tar: ./bin/node-problem-detector ./bin/log-counter ./test/bin/problem-maker
tar -zcvf $(TARBALL) bin/ config/ test/e2e-install.sh test/bin/problem-maker
sha1sum $(TARBALL)
md5sum $(TARBALL)

Expand All @@ -156,4 +166,5 @@ push: push-container push-tar
clean:
rm -f bin/log-counter
rm -f bin/node-problem-detector
rm -f test/bin/problem-maker
rm -f node-problem-detector-*.tar.gz
20 changes: 20 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,26 @@ Kubernetes cluster to a healthy state. The following remedy systems exist:
[this issue](https://github.com/kubernetes/node-problem-detector/issues/199)
for an example production use case for Draino.

# Testing

NPD is tested via unit tests, [NPD e2e tests](https://github.com/kubernetes/node-problem-detector/blob/master/test/e2e/README.md), Kubernetes e2e tests and Kubernetes nodes e2e tests. Prow handles the [pre-submit tests](https://github.com/kubernetes/test-infra/blob/master/config/jobs/kubernetes/node-problem-detector/node-problem-detector-presubmits.yaml) and [CI tests](https://github.com/kubernetes/test-infra/blob/master/config/jobs/kubernetes/node-problem-detector/node-problem-detector-ci.yaml).

CI test results can be found at below:
1. [Unit tests](https://k8s-testgrid.appspot.com/sig-node-node-problem-detector#ci-npd-test)
2. [NPD e2e tests](https://k8s-testgrid.appspot.com/sig-node-node-problem-detector#ci-npd-e2e-test)
3. [Kubernetes e2e tests](https://k8s-testgrid.appspot.com/sig-node-node-problem-detector#ci-npd-e2e-kubernetes-gce-gci)
4. [Kubernetes nodes e2e tests](https://k8s-testgrid.appspot.com/sig-node-node-problem-detector#ci-npd-e2e-node)

## Running tests

Unit test is ran via `make test`.

See [NPD e2e test documentation](https://github.com/kubernetes/node-problem-detector/blob/master/test/e2e/README.md) for how to setup and run NPD e2e tests.

## Problem Maker

[Problem maker](https://github.com/kubernetes/node-problem-detector/blob/master/test/e2e/problemmaker/README.md) is a program used in NPD e2e tests to generate/simulate node problems. It is ONLY indented to be used by NPD e2e tests. Please do NOT run it on your workstation, as it could cause real node problems.

# Docs

* [Custom plugin monitor](docs/custom_plugin_monitor.md)
Expand Down
10 changes: 10 additions & 0 deletions config/kernel-monitor.json
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,16 @@
"reason": "KernelOops",
"pattern": "divide error: 0000 \\[#\\d+\\] SMP"
},
{
"type": "temporary",
"reason": "Ext4Error",
"pattern": "EXT4-fs error .*"
},
{
"type": "temporary",
"reason": "Ext4Warning",
"pattern": "EXT4-fs warning .*"
},
{
"type": "permanent",
"condition": "KernelDeadlock",
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ require (
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b
github.com/google/cadvisor v0.33.0
github.com/onsi/ginkgo v1.8.0
github.com/onsi/gomega v1.5.0 // indirect
github.com/onsi/gomega v1.7.0
github.com/pborman/uuid v1.2.0
github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90
github.com/prometheus/common v0.4.1
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,8 @@ github.com/onsi/gomega v1.4.2/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1Cpa
github.com/onsi/gomega v1.4.3/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY=
github.com/onsi/gomega v1.5.0 h1:izbySO9zDPmjJ8rDjLvkA2zJHIo+HkYXHnf7eN7SSyo=
github.com/onsi/gomega v1.5.0/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY=
github.com/onsi/gomega v1.7.0 h1:XPnZz8VVBHjVsy1vzJmRwIcSwiUO+JFfrv/xGiigmME=
github.com/onsi/gomega v1.7.0/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY=
github.com/opencontainers/go-digest v1.0.0-rc1/go.mod h1:cMLVZDEM3+U2I4VmLI6N8jQYUd2OVphdqWwCJHrFt2s=
github.com/opencontainers/image-spec v1.0.1/go.mod h1:BtxoFyWECRxE4U/7sNtV5W15zMzWCbyJoFRP3s7yZA0=
github.com/openzipkin/zipkin-go v0.1.1/go.mod h1:NtoC/o8u3JlF1lSlyPNswIbeQH9bJTmOf0Erfk+hxe8=
Expand Down
3 changes: 3 additions & 0 deletions test/e2e-install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ function install-npd() {
echo "Installing NPD systemd service."
cp "${workdir}"/config/systemd/node-problem-detector-metric-only.service /etc/systemd/system/node-problem-detector.service

echo "Installing problem maker binary, used only for e2e testing."
cp "${workdir}"/test/bin/problem-maker "${BIN_DIR}"

rm -rf "${workdir}"

# Start systemd service.
Expand Down
3 changes: 2 additions & 1 deletion test/e2e/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Currently the tests only support Google Compute Engine (GCE) environment. Suppor

## Prerequisites

1. Setup [Google Application Default Credentials](https://developers.google.com/identity/protocols/application-default-credentials), which is [required for authentication](https://godoc.org/google.golang.org/api/compute/v1#hdr-Creating_a_client) by the Compute Engine API.
1. Setup [Google Application Default Credentials (ADC)](https://developers.google.com/identity/protocols/application-default-credentials), which is [required for authentication](https://godoc.org/google.golang.org/api/compute/v1#hdr-Creating_a_client) by the Compute Engine API.
2. Setup a [project-wide SSH key](https://cloud.google.com/compute/docs/instances/adding-removing-ssh-keys#project-wide) that can be used to SSH into the GCE VMs.

## Running tests
Expand All @@ -21,5 +21,6 @@ export VM_IMAGE=[TESTED_OS_IMAGE:cos-73-11647-217-0]
export IMAGE_PROJECT=[TESTED_OS_IMAGE_PROJECT:cos-cloud]
export SSH_USER=${USER}
export SSH_KEY=~/.ssh/id_rsa
export ARTIFACTS=/tmp/npd
make e2e-test
```
17 changes: 15 additions & 2 deletions test/e2e/lib/gce/instance.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (

"k8s.io/node-problem-detector/test/e2e/lib/ssh"

. "github.com/onsi/gomega"
compute "google.golang.org/api/compute/v1"
)

Expand Down Expand Up @@ -145,14 +146,26 @@ func (ins *Instance) RunCommand(cmd string) ssh.Result {
return ssh.Run(cmd, ins.ExternalIP, ins.SshUser, ins.SshKey)
}

// RunCommand runs a command on the GCE instance and returns the command result, and fails the test when the command failed.
func (ins *Instance) RunCommandOrFail(cmd string) ssh.Result {
result := ins.RunCommand(cmd)
Expect(result.SSHError).ToNot(HaveOccurred(), "SSH-ing to the instance failed: %v\n", result)
Expect(result.Code).To(Equal(0), "Running command failed: %v\n", result)
return result
}

// PushFile pushes a local file to a GCE instance.
func (ins *Instance) PushFile(srcPath, destPath string) error {
if ins.ExternalIP == "" {
ins.populateExternalIP()
}
return exec.Command("scp", "-o", "StrictHostKeyChecking no",
output, err := exec.Command("scp", "-o", "StrictHostKeyChecking no",
"-i", ins.SshKey,
srcPath, fmt.Sprintf("%s@%s:%s", ins.SshUser, ins.ExternalIP, destPath)).Run()
srcPath, fmt.Sprintf("%s@%s:%s", ins.SshUser, ins.ExternalIP, destPath)).CombinedOutput()
if err != nil {
return fmt.Errorf("Error running scp: %v.\nHere is the output for the command: %v", err, string(output))
}
return nil
}

// DeleteInstance deletes a GCE instance.
Expand Down
60 changes: 37 additions & 23 deletions test/e2e/lib/npd/npd.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,6 @@ import (
"github.com/avast/retry-go"
)

const npdMetricsFilename = "node-problem-detector-metrics.txt"
const npdLogsFilename = "node-problem-detector.log"

// SetupNPD installs NPD from the test tarball onto the provided GCE instance.
//
// Here is how it works:
Expand Down Expand Up @@ -91,6 +88,20 @@ func FetchNPDMetrics(ins gce.Instance) ([]metrics.Float64MetricRepresentation, e
return npdMetrics, nil
}

// FetchNPDMetric fetches and parses a specific metric reported by NPD on the provided GCE instance.
func FetchNPDMetric(ins gce.Instance, metricName string, labels map[string]string) (float64, error) {
gotMetrics, err := FetchNPDMetrics(ins)
if err != nil {
return 0.0, err
}
metric, err := metrics.GetFloat64Metric(gotMetrics, metricName, labels, true)
if err != nil {
return 0.0, fmt.Errorf("Failed to find %s metric with label %v: %v.\nHere is all NPD exported metrics: %v",
metricName, labels, err, gotMetrics)
}
return metric.Value, nil
}

// WaitForNPD waits for NPD to become ready by waiting for expected metrics.
func WaitForNPD(ins gce.Instance, metricNames []string, timeoutSeconds uint) error {
verifyMetricExist := func() error {
Expand All @@ -116,30 +127,33 @@ func WaitForNPD(ins gce.Instance, metricNames []string, timeoutSeconds uint) err
}

// SaveTestArtifacts saves debugging data from NPD.
func SaveTestArtifacts(ins gce.Instance, directory string) []error {
func SaveTestArtifacts(ins gce.Instance, artifactDirectory string, testID int) []error {
var errs []error

npdMetrics := ins.RunCommand("curl http://localhost:20257/metrics")
if npdMetrics.SSHError != nil || npdMetrics.Code != 0 {
errs = append(errs, fmt.Errorf("Error fetching NPD metrics: %v\n", npdMetrics))
} else {
npdMetricsPath := path.Join(directory, npdMetricsFilename)
err := ioutil.WriteFile(npdMetricsPath, []byte(npdMetrics.Stdout), 0644)
if err != nil {
errs = append(errs, fmt.Errorf("Error writing to %s: %v", npdMetricsPath, err))
}
if err := saveCommandResultAsArtifact(ins, artifactDirectory, testID,
"curl http://localhost:20257/metrics", "node-problem-detector-metrics"); err != nil {
errs = append(errs, err)
}

npdLog := ins.RunCommand("sudo journalctl -u node-problem-detector.service")
if npdLog.SSHError != nil || npdLog.Code != 0 {
errs = append(errs, fmt.Errorf("Error fetching NPD logs: %v\n", npdLog))
} else {
npdLogsPath := path.Join(directory, npdLogsFilename)
err := ioutil.WriteFile(npdLogsPath, []byte(npdLog.Stdout), 0644)
if err != nil {
errs = append(errs, fmt.Errorf("Error writing to %s: %v", npdLogsPath, err))
}
if err := saveCommandResultAsArtifact(ins, artifactDirectory, testID,
"sudo journalctl -u node-problem-detector.service", "node-problem-detector"); err != nil {
errs = append(errs, err)
}
if err := saveCommandResultAsArtifact(ins, artifactDirectory, testID,
"sudo journalctl -k", "kernel-logs"); err != nil {
errs = append(errs, err)
}

return errs
}

func saveCommandResultAsArtifact(ins gce.Instance, artifactDirectory string, testID int, command string, artifactPrefix string) error {
artifactPath := path.Join(artifactDirectory, fmt.Sprintf("%v-%02d.txt", artifactPrefix, testID))
result := ins.RunCommand(command)
if result.SSHError != nil || result.Code != 0 {
return fmt.Errorf("Error running command: %v\n", result)
}
if err := ioutil.WriteFile(artifactPath, []byte(result.Stdout), 0644); err != nil {
return fmt.Errorf("Error writing artifact to %v: %v\n", artifactPath, err)
}
return nil
}
18 changes: 8 additions & 10 deletions test/e2e/metriconly/e2e_npd_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,12 @@ import (
"k8s.io/test-infra/boskos/client"

"github.com/onsi/ginkgo"
"github.com/onsi/ginkgo/config"
"github.com/onsi/ginkgo/reporters"
. "github.com/onsi/gomega"
compute "google.golang.org/api/compute/v1"
)

const junitFileName = "junit.xml"

var zone = flag.String("zone", "", "gce zone the hosts live in")
var project = flag.String("project", "", "gce project the hosts live in")
var image = flag.String("image", "", "image to test")
Expand All @@ -49,7 +49,7 @@ var boskosProjectType = flag.String("boskos-project-type", "gce-project",
"specifies which project type to select from Boskos.")
var boskosServerURL = flag.String("boskos-server-url", "http://boskos.test-pods.svc.cluster.local",
"specifies Boskos server URL.")
var boskosWaitDuration = flag.Duration("boskos-wait-duration", 5*time.Minute,
var boskosWaitDuration = flag.Duration("boskos-wait-duration", 2*time.Minute,
"Duration to wait before quitting getting Boskos resource.")

var computeService *compute.Service
Expand Down Expand Up @@ -80,7 +80,7 @@ func TestNPD(t *testing.T) {
}

// The junit formatted result output is for showing test results on testgrid.
junitReporter := reporters.NewJUnitReporter(path.Join(*artifactsDir, junitFileName))
junitReporter := reporters.NewJUnitReporter(path.Join(*artifactsDir, fmt.Sprintf("junit-%02d.xml", config.GinkgoConfig.ParallelNode)))
ginkgo.RunSpecsWithDefaultAndCustomReporters(t, "NPD Metric-only Suite", []ginkgo.Reporter{junitReporter})
}

Expand All @@ -89,9 +89,8 @@ func acquireProjectOrDie(boskosClient *client.Client) string {
ctx, cancel := context.WithTimeout(context.Background(), *boskosWaitDuration)
defer cancel()
p, err := boskosClient.AcquireWait(ctx, *boskosProjectType, "free", "busy")
if err != nil {
panic(fmt.Sprintf("Unable to rent project from Boskos: %v\n", err))
}
Expect(err).NotTo(HaveOccurred(), fmt.Sprintf("Unable to rent project from Boskos: %v\n", err))

fmt.Printf("Rented project %s from Boskos", p.Name)

go func(boskosClient *client.Client, projectName string) {
Expand All @@ -110,12 +109,11 @@ func releaseProjectOrDie(boskosClient *client.Client) {
return
}
err := boskosClient.ReleaseAll("dirty")
if err != nil {
panic(fmt.Sprintf("Failed to release project to Boskos: %v", err))
}
Expect(err).NotTo(HaveOccurred(), fmt.Sprintf("Failed to release project to Boskos: %v", err))
}

func TestMain(m *testing.M) {
RegisterFailHandler(ginkgo.Fail)
flag.Parse()

os.Exit(m.Run())
Expand Down
Loading

0 comments on commit 9d584df

Please sign in to comment.