Skip to content

Commit

Permalink
Modify NB Dockerfile and start scripts to detect PVC mount for /home/…
Browse files Browse the repository at this point in the history
…jovyan (#786)
  • Loading branch information
Pete MacKinnon authored and k8s-ci-robot committed May 23, 2018
1 parent 12fe49a commit 2a68f29
Show file tree
Hide file tree
Showing 7 changed files with 82 additions and 32 deletions.
2 changes: 1 addition & 1 deletion bootstrap/cmd/bootstrap/app/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -394,7 +394,7 @@ func Run(opt *options.ServerOption) error {

pvcMount := ""
if hasDefault {
pvcMount = "/home/jovyan/work"
pvcMount = "/home/jovyan"
}

err = actions.RunParamSet(map[string]interface{}{
Expand Down
58 changes: 31 additions & 27 deletions components/tensorflow-notebook-image/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ ENV DEBIAN_FRONTEND noninteractive
ENV NB_USER jovyan
ENV NB_UID 1000
ENV HOME /home/$NB_USER
ENV CONDA_DIR=$HOME/.conda
# We prefer to have a global conda install
# to minimize the amount of content in $HOME
ENV CONDA_DIR=/opt/conda
ENV PATH $CONDA_DIR/bin:$PATH

# Use bash instead of sh
Expand Down Expand Up @@ -50,27 +52,17 @@ ENV LANG en_US.UTF-8
ENV LANGUAGE en_US.UTF-8

# Create jovyan user with UID=1000 and in the 'users' group
RUN useradd -m -s /bin/bash -N -u $NB_UID $NB_USER && \
chown -R ${NB_USER}:users /usr/local/bin
# but allow for non-initial launches of the notebook to have
# $HOME provided by the contents of a PV
RUN useradd -M -s /bin/bash -N -u $NB_UID $NB_USER && \
chown -R ${NB_USER}:users /usr/local/bin && \
mkdir -p $HOME

RUN export CLOUD_SDK_REPO="cloud-sdk-$(lsb_release -c -s)" && \
echo "deb https://packages.cloud.google.com/apt $CLOUD_SDK_REPO main" > /etc/apt/sources.list.d/google-cloud-sdk.list && \
curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - && \
apt-get update && \
apt-get install -y google-cloud-sdk kubectl && \
gcloud config set core/disable_usage_reporting true && \
gcloud config set component_manager/disable_update_check true && \
gcloud config set metrics/environment github_docker_image

RUN chown -R ${NB_USER}:users /home/${NB_USER}

# Run everything below this as $NB_USER
USER $NB_USER

WORKDIR $HOME

# Setup work directory for backward-compatibility
RUN mkdir /home/$NB_USER/work
apt-get install -y google-cloud-sdk kubectl

# Install Tini - used as entrypoint for container
RUN cd /tmp && \
Expand All @@ -94,6 +86,10 @@ RUN cd /tmp && \
conda update conda && \
conda clean -tipsy

# NOTE: Beyond this point be careful of breaking out
# or otherwise adding new layers with RUN, chown, etc.
# The image size can grow significantly.

# Install base python3 packages
RUN pip install --upgrade pip && \
pip --no-cache-dir install \
Expand Down Expand Up @@ -126,23 +122,31 @@ RUN conda create -n py2 python=2 && \
pip install --no-cache-dir tensorflow-model-analysis && \
jupyter nbextension install --py --symlink tensorflow_model_analysis --user && \
jupyter nbextension enable --py tensorflow_model_analysis --user; \
fi

# Install jupyterlab-manager
RUN conda install --quiet --yes \
fi \
&& \
# Install jupyterlab-manager
conda install --quiet --yes \
# nodejs required for jupyterlab-manager
nodejs && \
jupyter labextension install @jupyter-widgets/jupyterlab-manager
jupyter labextension install @jupyter-widgets/jupyterlab-manager && \
# Do chown in this layer for significant size savings
chown -R ${NB_USER}:users $HOME && \
chown -R ${NB_USER}:users $CONDA_DIR

# Install common packages from requirements.txt for both python2 and python3
COPY --chown=jovyan:users requirements.txt $HOME/requirements.txt
RUN pip --no-cache-dir install -r $HOME/requirements.txt && \
# NB: the COPY chown can't expand a bash variable for NB_USER
COPY --chown=jovyan:users requirements.txt /tmp
COPY --chown=jovyan:users jupyter_notebook_config.py /tmp
RUN pip --no-cache-dir install -r /tmp/requirements.txt && \
source activate py2 && \
pip --no-cache-dir install -r $HOME/requirements.txt
pip --no-cache-dir install -r /tmp/requirements.txt

# Wipe $HOME for PVC detection later
WORKDIR $HOME
RUN rm -fr $(ls -A $HOME)

# Copy over init scripts
COPY --chown=jovyan:users start-singleuser.sh start-notebook.sh start.sh /usr/local/bin/
COPY --chown=jovyan:users jupyter_notebook_config.py $HOME/.jupyter/
COPY --chown=jovyan:users start-singleuser.sh start-notebook.sh start.sh pvc-check.sh /usr/local/bin/
RUN chmod a+rx /usr/local/bin/*

# Configure container startup
Expand Down
39 changes: 39 additions & 0 deletions components/tensorflow-notebook-image/pvc-check.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#!/bin/bash

# Copyright 2016 The Kubeflow Authors All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# stored in the NB Dockerfile
SRC_CONF=/tmp/jupyter_notebook_config.py
WORK_DIR=$HOME/work
CONF_DIR=$HOME/.jupyter

echo "checking if $HOME volume needs init..."

if [ "$(ls -A $HOME)" ]; then
# assume we are working with an existing volume via a PVC
echo "...$HOME already has content..."
else
# clean volume, needs init
echo "...creating $WORK_DIR"
mkdir $WORK_DIR
mkdir $CONF_DIR

echo "...load initial content into $HOME..."
cp $SRC_CONF $CONF_DIR

chown -R $NB_USER:users $(ls -A $HOME)
fi

echo "...done"
3 changes: 3 additions & 0 deletions components/tensorflow-notebook-image/start-singleuser.sh
Original file line number Diff line number Diff line change
Expand Up @@ -46,4 +46,7 @@ if [ ! -z "$JPY_HUB_API_URL" ]; then
NOTEBOOK_ARGS="--hub-api-url=$JPY_HUB_API_URL $NOTEBOOK_ARGS"
fi

# check to see if a PV has been mounted
. /usr/local/bin/pvc-check.sh

. /usr/local/bin/start.sh jupyterhub-singleuser $NOTEBOOK_ARGS $@
6 changes: 5 additions & 1 deletion kubeflow/core/kubeform_spawner.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,14 +98,18 @@ def extra_resource_limits(self):
c.KubeSpawner.start_timeout = 60 * 30
# Increase timeout to 5 minutes to avoid HTTP 500 errors on JupyterHub
c.KubeSpawner.http_timeout = 60 * 5

# Volume setup
c.KubeSpawner.singleuser_uid = 1000
c.KubeSpawner.singleuser_fs_gid = 100
c.KubeSpawner.singleuser_working_dir = '/home/jovyan'
volumes = []
volume_mounts = []
###################################################
# Persistent volume options
###################################################
# Using persistent storage requires a default storage class.
# TODO(jlewi): Verify this works on minikube.
# TODO(jlewi): Should we set c.KubeSpawner.singleuser_fs_gid = 1000
# see https://github.com/kubeflow/kubeflow/pull/22#issuecomment-350500944
pvc_mount = os.environ.get('NOTEBOOK_PVC_MOUNT')
if pvc_mount and pvc_mount != 'null':
Expand Down
2 changes: 1 addition & 1 deletion kubeflow/core/tests/jupyterhub_test.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ local params = {
jupyterHubAuthenticator:: null,
jupyterHubServiceType:: "ClusterIP",
jupyterHubImage: "gcr.io/kubeflow/jupyterhub-k8s:1.0.1",
jupyterNotebookPVCMount: "/home/jovyan/work",
jupyterNotebookPVCMount: "/home/jovyan",
cloud: null,
};

Expand Down
4 changes: 2 additions & 2 deletions user_guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -126,10 +126,10 @@ kubectl get storageclass
parameter to create a volume that will be mounted within the notebook

```
ks param set kubeflow-core jupyterNotebookPVCMount /home/jovyan/work
ks param set kubeflow-core jupyterNotebookPVCMount /home/jovyan
```

* Here we mount the volume at `/home/jovyan/work` because the notebook
* Here we mount the volume at `/home/jovyan` because the notebook
always executes as user jovyan
* The selected directory will be stored on whatever storage is the default
for the cluster (typically some form of persistent disk)
Expand Down

0 comments on commit 2a68f29

Please sign in to comment.