Skip to content

Commit

Permalink
Merge pull request #22962 from ihmccreery/split-upgrade-jobs
Browse files Browse the repository at this point in the history
Add configuration for splitting upgrade jobs instead of using Jenkins steps
  • Loading branch information
Isaac Hollander McCreery committed Apr 5, 2016
2 parents 66bf12b + 873b499 commit 9a78608
Show file tree
Hide file tree
Showing 7 changed files with 170 additions and 52 deletions.
1 change: 1 addition & 0 deletions hack/ginkgo-e2e.sh
Original file line number Diff line number Diff line change
Expand Up @@ -107,4 +107,5 @@ export PATH=$(dirname "${e2e_test}"):"${PATH}"
${E2E_CLEAN_START:+"--clean-start=true"} \
${E2E_MIN_STARTUP_PODS:+"--minStartupPods=${E2E_MIN_STARTUP_PODS}"} \
${E2E_REPORT_DIR:+"--report-dir=${E2E_REPORT_DIR}"} \
${E2E_REPORT_PREFIX:+"--report-prefix=${E2E_REPORT_PREFIX}"} \
"${@:-}"
49 changes: 36 additions & 13 deletions hack/jenkins/e2e-runner.sh
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,22 @@ if [[ "${JENKINS_USE_TRUSTY_IMAGES:-}" =~ ^[yY]$ ]]; then
export KUBE_OS_DISTRIBUTION="trusty"
fi

function e2e_test() {
local -r ginkgo_test_args="${1}"
# Check to make sure the cluster is up before running tests, and fail if it's not.
go run ./hack/e2e.go ${E2E_OPT:-} -v --isup
# Jenkins will look at the junit*.xml files for test failures, so don't exit with a nonzero
# error code if it was only tests that failed.
go run ./hack/e2e.go ${E2E_OPT:-} -v --test \
${ginkgo_test_args:+--test_args="${ginkgo_test_args}"} \
&& exitcode=0 || exitcode=$?
if [[ "${E2E_PUBLISH_GREEN_VERSION:-}" == "true" && ${exitcode} == 0 ]]; then
# Use plaintext version file packaged with kubernetes.tar.gz
echo "Publish version to ci/latest-green.txt: $(cat version)"
gsutil cp ./version gs://kubernetes-release/ci/latest-green.txt
fi
}

echo "--------------------------------------------------------------------------------"
echo "Test Environment:"
printenv | sort
Expand Down Expand Up @@ -231,7 +247,7 @@ fi
### Pre Set Up ###
# Install gcloud from a custom path if provided. Used to test GKE with gcloud
# at HEAD, release candidate.
if [[ ! -z "${CLOUDSDK_BUCKET:-}" ]]; then
if [[ -n "${CLOUDSDK_BUCKET:-}" ]]; then
gsutil -mq cp -r "${CLOUDSDK_BUCKET}" ~
rm -rf ~/repo ~/cloudsdk
mv ~/$(basename "${CLOUDSDK_BUCKET}") ~/repo
Expand Down Expand Up @@ -267,21 +283,28 @@ if [[ "${E2E_UP,,}" == "true" ]]; then
fi
fi

### Run tests ###
# Jenkins will look at the junit*.xml files for test failures, so don't exit
# with a nonzero error code if it was only tests that failed.
if [[ "${E2E_TEST,,}" == "true" ]]; then
# Check to make sure the cluster is up before running tests, and fail if it's not.
go run ./hack/e2e.go ${E2E_OPT:-} -v --isup
go run ./hack/e2e.go ${E2E_OPT:-} -v --test \
${GINKGO_TEST_ARGS:+--test_args="${GINKGO_TEST_ARGS}"} \
&& exitcode=0 || exitcode=$?
if [[ "${E2E_PUBLISH_GREEN_VERSION:-}" == "true" && ${exitcode} == 0 && -n ${build_version:-} ]]; then
echo "Publish build_version to ci/latest-green.txt: ${build_version}"
gsutil cp ./version gs://kubernetes-release/ci/latest-green.txt
# Allow download & unpack of alternate version of tests, for cross-version & upgrade testing.
if [[ -n "${JENKINS_PUBLISHED_TEST_VERSION:-}" ]]; then
cd ..
mv kubernetes kubernetes_old
fetch_published_version_tars "${JENKINS_PUBLISHED_TEST_VERSION}"
cd kubernetes
# Upgrade the cluster before running other tests
if [[ "${E2E_UPGRADE_TEST,,}" == "true" ]]; then
# Add a report prefix for the e2e tests so that the tests don't get overwritten when we run
# the rest of the e2es.
E2E_REPORT_PREFIX='upgrade' e2e_test "${GINKGO_UPGRADE_TEST_ARGS:-}"
# If JENKINS_USE_OLD_TESTS is set, back out into the old tests now that we've upgraded.
if [[ "${JENKINS_USE_OLD_TESTS:-}" == "true" ]]; then
cd ../kubernetes_old
fi
fi
fi

if [[ "${E2E_TEST,,}" == "true" ]]; then
e2e_test "${GINKGO_TEST_ARGS:-}"
fi

### Start Kubemark ###
if [[ "${USE_KUBEMARK:-}" == "true" ]]; then
export RUN_FROM_DISTRO=true
Expand Down
52 changes: 50 additions & 2 deletions hack/jenkins/job-configs/kubernetes-jenkins/kubernetes-e2e.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -572,7 +572,6 @@
jobs:
- 'kubernetes-e2e-{suffix}'

# ==============================================================================
# NOTE: From here on all jobs use Trusty as the image for master and/or nodes.
# Please add templates/groups/projects/jobs that use ContainerVm above/below
# this section (search "End of Trusty jobs" for the ending separator).
Expand Down Expand Up @@ -842,4 +841,53 @@
export KUBE_OS_DISTRIBUTION="trusty"
jobs:
- 'kubernetes-e2e-{suffix}'
#============================== End of Trusty jobs =============================

# End of Trusty jobs

- project:
name: kubernetes-e2e-gke-upgrades-experimental
trigger-job: 'kubernetes-build'
test-owner: 'ihmccreery'
emails: 'ihmccreery@google.com'
provider-env: '{gke-provider-env}'
suffix:
- 'gke-kubectl-skew-1.1-1.2':
description: 'Deploys a cluster at v1.1 and runs the v1.2 Kubectl tests.'
timeout: 30
job-env: |
export PROJECT="kube-jks-gke-upg-experimental"
export E2E_NAME="gke-ctl-skew-1-1-1-2"
export JENKINS_PUBLISHED_TEST_VERSION="ci/latest-1.2"
export GINKGO_TEST_ARGS="--ginkgo.focus=Kubectl"
export GINKGO_PARALLEL="y"
- 'gke-upgrade-master-1.1-1.2':
description: 'Deploys a cluster at v1.1, upgrades its master to v1.2, and runs v1.1 tests against it.'
timeout: 30
job-env: |
export PROJECT="kube-jks-gke-upg-experimental"
export E2E_NAME="gke-upg-mas-1-1-1-2"
export JENKINS_PUBLISHED_TEST_VERSION="ci/latest-1.2"
export E2E_UPGRADE_TEST="true"
export GINKGO_UPGRADE_TEST_ARGS="--ginkgo.focus=\[Feature:MasterUpgrade\] --upgrade-target=ci/latest-1.2"
export JENKINS_USE_OLD_TESTS="true"
- 'gke-upgrade-cluster-1.1-1.2':
description: 'Deploys a cluster at v1.1, upgrades the cluster to v1.2, and runs v1.1 tests against it.'
timeout: 30
job-env: |
export PROJECT="kube-jks-gke-upg-experimental"
export E2E_NAME="gke-upg-clu-1-1-1-2"
export JENKINS_PUBLISHED_TEST_VERSION="ci/latest-1.2"
export E2E_UPGRADE_TEST="true"
export GINKGO_UPGRADE_TEST_ARGS="--ginkgo.focus=\[Feature:ClusterUpgrade\] --upgrade-target=ci/latest-1.2"
export JENKINS_USE_OLD_TESTS="true"
- 'gke-upgrade-cluster-new-1.1-1.2':
description: 'Deploys a cluster at v1.1, upgrades the cluster to v1.2, and runs v1.2 tests against it.'
timeout: 30
job-env: |
export PROJECT="kube-jks-gke-upg-experimental"
export E2E_NAME="gke-upg-clu-new-1-1-1-2"
export JENKINS_PUBLISHED_TEST_VERSION="ci/latest-1.2"
export E2E_UPGRADE_TEST="true"
export GINKGO_UPGRADE_TEST_ARGS="--ginkgo.focus=\[Feature:ClusterUpgrade\] --upgrade-target=ci/latest-1.2"
jobs:
- 'kubernetes-e2e-{suffix}'
1 change: 1 addition & 0 deletions hack/verify-flags/known-flags.txt
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,7 @@ replicaset-lookup-cache-size
replication-controller-lookup-cache-size
repo-root
report-dir
report-prefix
required-contexts
resolv-conf
resource-container
Expand Down
115 changes: 79 additions & 36 deletions test/e2e/cluster_upgrade.go
Original file line number Diff line number Diff line change
Expand Up @@ -110,20 +110,52 @@ func nodeUpgradeGCE(rawV string) error {
// TODO(ihmccreery) This code path should be identical to how a user
// would trigger a node update; right now it's very different.
v := "v" + rawV

Logf("Getting the node template before the upgrade")
tmplBefore, err := migTemplate()
if err != nil {
return fmt.Errorf("error getting the node template before the upgrade: %v", err)
}

Logf("Preparing node upgrade by creating new instance template for %q", v)
stdout, _, err := runCmd(path.Join(testContext.RepoRoot, "cluster/gce/upgrade.sh"), "-P", v)
if err != nil {
return err
cleanupNodeUpgradeGCE(tmplBefore)
return fmt.Errorf("error preparing node upgrade: %v", err)
}
tmpl := strings.TrimSpace(stdout)

Logf("Performing a node upgrade to %q; waiting at most %v per node", tmpl, restartPerNodeTimeout)
if err := migRollingUpdate(tmpl, restartPerNodeTimeout); err != nil {
cleanupNodeUpgradeGCE(tmplBefore)
return fmt.Errorf("error doing node upgrade via a migRollingUpdate to %s: %v", tmpl, err)
}
return nil
}

func cleanupNodeUpgradeGCE(tmplBefore string) {
Logf("Cleaning up any unused node templates")
tmplAfter, err := migTemplate()
if err != nil {
Logf("Could not get node template post-upgrade; may have leaked template %s", tmplBefore)
return
}
if tmplBefore == tmplAfter {
// The node upgrade failed so there's no need to delete
// anything.
Logf("Node template %s is still in use; not cleaning up", tmplBefore)
return
}
Logf("Deleting node template %s", tmplBefore)
if _, _, err := retryCmd("gcloud", "compute", "instance-templates",
fmt.Sprintf("--project=%s", testContext.CloudConfig.ProjectID),
"delete",
tmplBefore); err != nil {
Logf("gcloud compute instance-templates delete %s call failed with err: %v", tmplBefore, err)
Logf("May have leaked instance template %q", tmplBefore)
}
}

func nodeUpgradeGKE(v string) error {
Logf("Upgrading nodes to %q", v)
_, _, err := runCmd("gcloud", "container",
Expand Down Expand Up @@ -204,43 +236,45 @@ var _ = KubeDescribe("Upgrade [Feature:Upgrade]", func() {
})

KubeDescribe("node upgrade", func() {
var tmplBefore, tmplAfter string
BeforeEach(func() {
if providerIs("gce") {
By("Getting the node template before the upgrade")
var err error
tmplBefore, err = migTemplate()
expectNoError(err)
}
It("should maintain a functioning cluster [Feature:NodeUpgrade]", func() {
By("Validating cluster before node upgrade")
expectNoError(validate(f, svcName, rcName, ingress, replicas))
By("Performing a node upgrade")
// Circumnavigate testUpgrade, since services don't necessarily stay up.
Logf("Starting upgrade")
expectNoError(nodeUpgrade(f, replicas, v))
Logf("Upgrade complete")
By("Checking node versions")
expectNoError(checkNodesVersions(f.Client, v))
By("Validating cluster after node upgrade")
expectNoError(validate(f, svcName, rcName, ingress, replicas))
})

AfterEach(func() {
if providerIs("gce") {
By("Cleaning up any unused node templates")
var err error
tmplAfter, err = migTemplate()
if err != nil {
Logf("Could not get node template post-upgrade; may have leaked template %s", tmplBefore)
return
}
if tmplBefore == tmplAfter {
// The node upgrade failed so there's no need to delete
// anything.
Logf("Node template %s is still in use; not cleaning up", tmplBefore)
return
}
Logf("Deleting node template %s", tmplBefore)
if _, _, err := retryCmd("gcloud", "compute", "instance-templates",
fmt.Sprintf("--project=%s", testContext.CloudConfig.ProjectID),
"delete",
tmplBefore); err != nil {
Logf("gcloud compute instance-templates delete %s call failed with err: %v", tmplBefore, err)
Logf("May have leaked instance template %q", tmplBefore)
}
}
It("should maintain responsive services [Feature:ExperimentalNodeUpgrade]", func() {
By("Validating cluster before node upgrade")
expectNoError(validate(f, svcName, rcName, ingress, replicas))
By("Performing a node upgrade")
testUpgrade(ip, v, func(v string) error {
return nodeUpgrade(f, replicas, v)
})
By("Checking node versions")
expectNoError(checkNodesVersions(f.Client, v))
By("Validating cluster after node upgrade")
expectNoError(validate(f, svcName, rcName, ingress, replicas))
})
})

KubeDescribe("cluster upgrade", func() {
It("should maintain responsive services [Feature:ClusterUpgrade]", func() {
By("Validating cluster before master upgrade")
expectNoError(validate(f, svcName, rcName, ingress, replicas))
By("Performing a master upgrade")
testUpgrade(ip, v, masterUpgrade)
By("Checking master version")
expectNoError(checkMasterVersion(f.Client, v))
By("Validating cluster after master upgrade")
expectNoError(validate(f, svcName, rcName, ingress, replicas))

It("should maintain a functioning cluster [Feature:NodeUpgrade]", func() {
By("Validating cluster before node upgrade")
expectNoError(validate(f, svcName, rcName, ingress, replicas))
By("Performing a node upgrade")
Expand All @@ -254,7 +288,16 @@ var _ = KubeDescribe("Upgrade [Feature:Upgrade]", func() {
expectNoError(validate(f, svcName, rcName, ingress, replicas))
})

It("should maintain responsive services [Feature:ExperimentalNodeUpgrade]", func() {
It("should maintain responsive services [Feature:ExperimentalClusterUpgrade]", func() {
By("Validating cluster before master upgrade")
expectNoError(validate(f, svcName, rcName, ingress, replicas))
By("Performing a master upgrade")
testUpgrade(ip, v, masterUpgrade)
By("Checking master version")
expectNoError(checkMasterVersion(f.Client, v))
By("Validating cluster after master upgrade")
expectNoError(validate(f, svcName, rcName, ingress, replicas))

By("Validating cluster before node upgrade")
expectNoError(validate(f, svcName, rcName, ingress, replicas))
By("Performing a node upgrade")
Expand Down Expand Up @@ -437,7 +480,7 @@ func migRollingUpdate(tmpl string, nt time.Duration) error {
return nil
}

// migTemplate (GCE/GKE-only) returns the name of the MIG template that the
// migTemplate (GCE-only) returns the name of the MIG template that the
// nodes of the cluster use.
func migTemplate() (string, error) {
var errLast error
Expand Down
3 changes: 2 additions & 1 deletion test/e2e/e2e.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ func RegisterFlags() {
flag.StringVar(&testContext.KubectlPath, "kubectl-path", "kubectl", "The kubectl binary to use. For development, you might use 'cluster/kubectl.sh' here.")
flag.StringVar(&testContext.OutputDir, "e2e-output-dir", "/tmp", "Output directory for interesting/useful test data, like performance data, benchmarks, and other metrics.")
flag.StringVar(&testContext.ReportDir, "report-dir", "", "Path to the directory where the JUnit XML reports should be saved. Default is empty, which doesn't generate these reports.")
flag.StringVar(&testContext.ReportPrefix, "report-prefix", "", "Optional prefix for JUnit XML reports. Default is empty, which doesn't prepend anything to the default name.")
flag.StringVar(&testContext.prefix, "prefix", "e2e", "A prefix to be added to cloud resources created during testing.")
flag.StringVar(&testContext.OSDistro, "os-distro", "debian", "The OS distribution of cluster VM instances (debian, trusty, or coreos).")

Expand Down Expand Up @@ -269,7 +270,7 @@ func RunE2ETests(t *testing.T) {
if err := os.MkdirAll(testContext.ReportDir, 0755); err != nil {
glog.Errorf("Failed creating report directory: %v", err)
} else {
r = append(r, reporters.NewJUnitReporter(path.Join(testContext.ReportDir, fmt.Sprintf("junit_%02d.xml", config.GinkgoConfig.ParallelNode))))
r = append(r, reporters.NewJUnitReporter(path.Join(testContext.ReportDir, fmt.Sprintf("junit_%v%02d.xml", testContext.ReportPrefix, config.GinkgoConfig.ParallelNode))))
}
}
glog.Infof("Starting e2e run %q on Ginkgo node %d", runId, config.GinkgoConfig.ParallelNode)
Expand Down
1 change: 1 addition & 0 deletions test/e2e/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,7 @@ type TestContextType struct {
KubectlPath string
OutputDir string
ReportDir string
ReportPrefix string
prefix string
MinStartupPods int
UpgradeTarget string
Expand Down

2 comments on commit 9a78608

@k8s-teamcity-mesosphere

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TeamCity OSS :: Kubernetes Mesos :: 4 - Smoke Tests Build 20622 outcome was SUCCESS
Summary: Tests passed: 1, ignored: 269 Build time: 00:05:17

@k8s-teamcity-mesosphere

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TeamCity OSS :: Kubernetes Mesos :: 4 - Smoke Tests Build 20623 outcome was FAILURE
Summary: Exit code 1 (new) Build time: 00:05:03

Please sign in to comment.