diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000000..79ae76af206 --- /dev/null +++ b/.gitignore @@ -0,0 +1,32 @@ +# pkg and bin directories currently contain build artifacts +# only so we exclude them. +bin/ +vendor/ + +.vscode/ + +# Compiled python files. +*.pyc + +# Emacs temporary files +*~ + +# Other temporary files +.DS_Store + +# Files created by Gogland IDE +.idea/ + +# Exclude wheel files for now. +# The only wheel file is the TF wheel one which is quite large. +# We don't want to check that into source control because it could be +# quite large. +*.whl + +# Bazel files +**/bazel-* +# Examples egg +examples/tf_sample/tf_sample.egg-info/ +examples/.ipynb_checkpoints/ + +**/.ipynb_checkpoints diff --git a/manifests/Makefile b/manifests/Makefile new file mode 100644 index 00000000000..30daeabe052 --- /dev/null +++ b/manifests/Makefile @@ -0,0 +1,14 @@ + +# Create a yaml template to deploy the CRD +# Requires the template plugin +# https://github.com/technosophos/helm-template +CHART := https://storage.googleapis.com/tf-on-k8s-dogfood-releases/latest/tf-job-operator-chart-latest.tgz +deploy_config: + rm -rf /tmp/tfjob_config_builder + mkdir -p /tmp/tfjob_config_builder + wget -O /tmp/tfjob_config_builder/tf-job-operator-chart-latest.tgz https://storage.googleapis.com/tf-on-k8s-dogfood-releases/latest/tf-job-operator-chart-latest.tgz + tar -C /tmp/tfjob_config_builder -xvf /tmp/tfjob_config_builder/tf-job-operator-chart-latest.tgz + # We set the templates to render because we don't want to render the tests. + helm template /tmp/tfjob_config_builder/tf-job-operator-chart --set cloud=gke,rbac.install=true \ + -x ./templates/config.yaml -x ./templates/deployment.yaml -x ./templates/rbac.yaml -x ./templates/service-account.yaml > deploy_crd.yaml + diff --git a/manifests/deploy_crd.yaml b/manifests/deploy_crd.yaml new file mode 100644 index 00000000000..d3562fd081c --- /dev/null +++ b/manifests/deploy_crd.yaml @@ -0,0 +1,139 @@ +--- +# Source: tf-job-operator-chart/templates/config.yaml + +apiVersion: v1 +kind: ConfigMap +metadata: + name: tf-job-operator-config +data: + controller_config_file.yaml: | + grpcServerFilePath: /opt/mlkube/grpc_tensorflow_server/grpc_tensorflow_server.py + accelerators: + alpha.kubernetes.io/nvidia-gpu: + volumes: + - name: nvidia-libraries + mountPath: /usr/local/nvidia/lib64 # This path is special; it is expected to be present in `/etc/ld.so.conf` inside the container image. + hostPath: /home/kubernetes/bin/nvidia/lib + - name: nvidia-debug-tools # optional + mountPath: /usr/local/bin/nvidia + hostPath: /home/kubernetes/bin/nvidia/bin + +--- +# Source: tf-job-operator-chart/templates/service-account.yaml + +apiVersion: v1 +kind: ServiceAccount +metadata: + name: tf-job-operator + labels: + app: tf-job-operator + +--- +# Source: tf-job-operator-chart/templates/rbac.yaml + +apiVersion: rbac.authorization.k8s.io/v1beta1 +kind: ClusterRole +metadata: + name: tf-job-operator + labels: + app: tf-job-operator +rules: +- apiGroups: + - tensorflow.org + resources: + - tfjobs + verbs: + - "*" +- apiGroups: + - apiextensions.k8s.io + resources: + - customresourcedefinitions + verbs: + - "*" +- apiGroups: + - storage.k8s.io + resources: + - storageclasses + verbs: + - "*" +- apiGroups: + - batch + resources: + - jobs + verbs: + - "*" +- apiGroups: + - "" + resources: + - configmaps + - pods + - services + - endpoints + - persistentvolumeclaims + - events + verbs: + - "*" +- apiGroups: + - apps + - extensions + resources: + - deployments + verbs: + - "*" +--- +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1beta1 +metadata: + name: tf-job-operator + labels: + app: tf-job-operator +subjects: +- kind: ServiceAccount + name: tf-job-operator + namespace: default +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: tf-job-operator + + +--- +# Source: tf-job-operator-chart/templates/deployment.yaml +apiVersion: extensions/v1beta1 +kind: Deployment +metadata: + name: tf-job-operator +spec: + replicas: 1 + template: + metadata: + labels: + name: tf-job-operator + spec: + serviceAccountName: tf-job-operator + containers: + - name: tf-job-operator + image: gcr.io/tf-on-k8s-dogfood/tf_operator:v20171129-f8ec762 + command: + - /opt/mlkube/tf_operator + - --controller_config_file=/etc/config/controller_config_file.yaml + - -alsologtostderr + - -v=1 + env: + - name: MY_POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: MY_POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + + volumeMounts: + - name: config-volume + mountPath: /etc/config + volumes: + - name: config-volume + configMap: + name: tf-job-operator-config +