Skip to content

Commit

Permalink
Modify presubmits to support testing with v1alpha2 (kubeflow#632)
Browse files Browse the repository at this point in the history
* Changes to support v1alpha2 testing in presubmits.

* The tests are currently disabled because they aren't passing yet because
  termination policy isn't handled correctly (kubeflow#634)

* Changed the v1alpha2 test to use the same smoke test as used by v1alpha1 as
  opposed to using mnist.
  mnist causing problems because of issues downloading the data
  see kubeflow/kubeflow#974

* We want a simpler test that allows for more direct testing of the distributed
  communication pattern
* Also mnist is expensive in that it tries to download data.

* Add a parameter tfJobVersion to the deploy script so we can control
  whether we deploy v1alpha1 or v1alpha2

* Parameterize the E2E test workflow by the TFJob version we want to run.

* update test-app - We need to pull in a version of the app which
  has the TFJobVersion flag.

* Create a script to regenerate the test-app for future use.

Related to kubeflow#589

* Fix versionTag logic; we need to allow for case where versionTag is an
empty string.
  • Loading branch information
jlewi authored and Penghui Yan committed Jun 18, 2018
1 parent 3900ad8 commit af9f990
Show file tree
Hide file tree
Showing 61 changed files with 3,921 additions and 151,815 deletions.
10 changes: 10 additions & 0 deletions prow_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,13 @@ workflows:
- app_dir: kubeflow/tf-operator/test/workflows
component: workflows
name: tfjob-e2e
params:
tfJobVersion: v1alpha1
# TODO*https://github.com/kubeflow/tf-operator/issues/634)
# Enable the v1alpha2 once the job successfully completes
# when master completes.
#- app_dir: kubeflow/tf-operator/test/workflows
# component: workflows
# name: tfjob-e2e-v1alpha2
# params:
# tfJobVersion: v1alpha2
17 changes: 15 additions & 2 deletions py/deploy.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ def setup(args):
"tfJobImage": args.image,
"name": "kubeflow-core",
"namespace": args.namespace,
"tfJobVersion": args.tf_job_version,
}

component = "core"
Expand All @@ -171,8 +172,15 @@ def setup(args):
util.setup_cluster(api_client)

# Verify that the TfJob operator is actually deployed.
tf_job_deployment_name = "tf-job-operator"
logging.info("Verifying TfJob controller started.")
if args.tf_job_version == "v1alpha1":
tf_job_deployment_name = "tf-job-operator"
elif args.tf_job_version == "v1alpha2":
tf_job_deployment_name = "tf-job-operator-v1alpha2"
else:
raise ValueError(
"Unrecognized value for tf_job_version %s" % args.tf_job_version)
logging.info("Verifying TfJob deployment %s started.",
tf_job_deployment_name)

# TODO(jlewi): We should verify the image of the operator is the correct.
util.wait_for_deployment(api_client, args.namespace, tf_job_deployment_name)
Expand Down Expand Up @@ -247,6 +255,11 @@ def main(): # pylint: disable=too-many-locals
action="append",
help="Accelerator to add to the cluster. Should be of the form type=count.")

parser_setup.add_argument(
"--tf_job_version",
dest="tf_job_version",
help="Which version of the TFJobOperator to deploy.")

parser_setup.set_defaults(func=setup)
add_common_args(parser_setup)

Expand Down
50 changes: 50 additions & 0 deletions test/recreate_app.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#!/bin/bash
#
# A simple script to recreate the Kubeflow test app
#
set -ex
# Create a namespace for kubeflow deployment
NAMESPACE=kubeflow

# Which version of Kubeflow to use
# For a list of releases refer to:
# https://github.com/kubeflow/kubeflow/releases
VERSION=master
API_VERSION=v1.7.0

DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"

cd ${DIR}

APP_NAME=test-app


if [ -d ${DIR}/${APP_NAME} ]; then
# TODO(jlewi): Maybe we should prompt to ask if we want to delete?
echo "Directory ${DIR}/${APP_NAME} exists"
echo "Do you want to delete ${DIR}/${APP_NAME} y/n[n]:"
read response

if [ "${response}"=="y" ]; then
rm -r ${DIR}/${APP_NAME}
else
"Aborting"
exit 1
fi
fi

ks init ${APP_NAME} --api-spec=version:${API_VERSION}
cd ${APP_NAME}
ks env set default --namespace ${NAMESPACE}

# Install Kubeflow components
ks registry add kubeflow github.com/kubeflow/kubeflow/tree/${VERSION}/kubeflow

ks pkg install kubeflow/core@${VERSION}

# Create templates for core components
ks generate kubeflow-core core

# Run autoformat from the git root
cd ${DIR}/..
bash <(curl -s https://raw.githubusercontent.com/kubeflow/kubeflow/${VERSION}/scripts/autoformat_jsonnet.sh)
4 changes: 4 additions & 0 deletions test/test-app/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
/lib
/.ksonnet/registries
/app.override.yaml
/.ks_environment

This file was deleted.

This file was deleted.

10 changes: 5 additions & 5 deletions test/test-app/app.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,29 +2,29 @@ apiVersion: 0.1.0
environments:
default:
destination:
namespace: kubeflow-releasing
server: https://35.226.49.107
namespace: kubeflow
server: https://35.196.213.148
k8sVersion: v1.7.0
path: default
kind: ksonnet.io/app
libraries:
core:
gitVersion:
commitSha: 845f2a02e6ef4e25cae8555a37924d3510d07b36
commitSha: f7a68336ad7a65c2cbba8462e89d24a10626687e
refSpec: master
name: core
registry: kubeflow
name: test-app
registries:
incubator:
gitVersion:
commitSha: 422d521c05aa905df949868143b26445f5e4eda5
commitSha: 40285d8a14f1ac5787e405e1023cf0c07f6aa28c
refSpec: master
protocol: github
uri: github.com/ksonnet/parts/tree/master/incubator
kubeflow:
gitVersion:
commitSha: 845f2a02e6ef4e25cae8555a37924d3510d07b36
commitSha: f7a68336ad7a65c2cbba8462e89d24a10626687e
refSpec: master
protocol: github
uri: github.com/kubeflow/kubeflow/tree/master/kubeflow
Expand Down
12 changes: 10 additions & 2 deletions test/test-app/components/core.jsonnet
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
local env = std.extVar("__ksonnet/environments");
local params = std.extVar("__ksonnet/params").components.core;
local k = import 'k.libsonnet';

local k = import "k.libsonnet";
local all = import "kubeflow/core/all.libsonnet";

std.prune(k.core.v1.list.new(all.parts(params).all))
// updatedParams uses the environment namespace if
// the namespace parameter is not explicitly set
local updatedParams = params {
namespace: if params.namespace == "null" then env.namespace else params.namespace,
};

std.prune(k.core.v1.list.new(all.parts(updatedParams).all))
15 changes: 12 additions & 3 deletions test/test-app/components/params.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,22 @@
cloud: "null",
disks: "null",
jupyterHubAuthenticator: "null",
jupyterHubImage: "gcr.io/kubeflow/jupyterhub-k8s:1.0.1",
jupyterHubImage: "gcr.io/kubeflow/jupyterhub-k8s:v20180531-3bb991b1",
jupyterHubServiceType: "ClusterIP",
jupyterNotebookPVCMount: "null",
jupyterNotebookRegistry: "gcr.io",
jupyterNotebookRepoName: "kubeflow-images-public",
name: "core",
namespace: "default",
namespace: "null",
reportUsage: "false",
tfAmbassadorImage: "quay.io/datawire/ambassador:0.30.1",
tfAmbassadorServiceType: "ClusterIP",
tfDefaultImage: "null",
tfJobImage: "gcr.io/tf-on-k8s-dogfood/tf_operator:v20180131-cabc1c0-dirty-e3b0c44",
tfJobImage: "gcr.io/kubeflow-images-public/tf_operator:v20180522-77375baf",
tfJobUiServiceType: "ClusterIP",
tfJobVersion: "v1alpha1",
tfStatsdImage: "quay.io/datawire/statsd:0.30.1",
usageId: "unknown_cluster",
},
},
}
80 changes: 0 additions & 80 deletions test/test-app/environments/default/.metadata/k.libsonnet

This file was deleted.

Loading

0 comments on commit af9f990

Please sign in to comment.