Merge pull request zenml-io#3 from zenml-io/feature/ENG-771-update-ze…

…nbytes-and-zenfiles-with-zen-ml-version-0-7-3 Update ZenByte to be compatible with ZenML 0.7.3.
juanjucm · May 4, 2022 · 9fab1c1 · 9fab1c1
2 parents eb8b877 + 6954f7e
commit 9fab1c1
Show file tree

Hide file tree

Showing 7 changed files with 118 additions and 92 deletions.
diff --git a/01 - Building a ML(Ops) pipeline.ipynb b/01 - Building a ML(Ops) pipeline.ipynb
@@ -14,6 +14,15 @@
     "![Test](_assets/Logo/zenml.svg)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install matplotlib"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -81,7 +90,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Add a Model Deployer to your Stack"
+    "## Add MLFlow Experiment Tracker and Model Deployer to your stack"
    ]
   },
   {
@@ -90,11 +99,14 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "# Register the MLflow experiment tracker\n",
+    "!zenml experiment-tracker register mlflow_tracker --type=mlflow\n",
+    "\n",
     "# Register the MLflow model deployer\n",
     "!zenml model-deployer register mlflow --type=mlflow\n",
     "\n",
-    "# Create a new stack with MLflow deployer in it\n",
-    "!zenml stack register local_with_mlflow -m default -a default -o default -d mlflow"
+    "# Create a new stack with MLflow experiment tracker and deployer in it\n",
+    "!zenml stack register local_with_mlflow -m default -a default -o default -e mlflow_tracker -d mlflow"
    ]
   },
   {
@@ -129,7 +141,7 @@
    "metadata": {},
    "source": [
     "## Define Steps\n",
-    "In the code that follows, you can see that we are defining the various steps of our pipeline. Each step is decorated with @step, the main abstraction that is currently available for creating pipeline steps."
+    "In the code that follows, you can see that we are defining the various steps of our pipeline. Each step is decorated with `@step`, the main abstraction that is currently available for creating pipeline steps."
    ]
   },
   {
@@ -698,13 +710,8 @@
     "# This will start a serving process for mlflow \n",
     "#  - if you want to continue in the notebook you need to manually\n",
     "#  interrupt the kernel \n",
-    "from zenml.environment import Environment\n",
-    "from zenml.integrations.mlflow.mlflow_environment import MLFlowEnvironment\n",
-    "\n",
-    "with MLFlowEnvironment() as env:\n",
-    "    env.tracking_uri\n",
-    "    !mlflow ui --backend-store-uri {env.tracking_uri} --port 4997\n",
-    "    print(env.tracking_uri)"
+    "from zenml.integrations.mlflow.mlflow_utils import get_tracking_uri\n",
+    "!mlflow ui --backend-store-uri=\"{get_tracking_uri()}\" --port=4997"
    ]
   },
   {
@@ -777,11 +784,8 @@
     "# This will start a serving process for mlflow \n",
     "#  - if you want to continue in the notebook you need to manually\n",
     "#  interrupt the kernel \n",
-    "from zenml.environment import Environment\n",
-    "from zenml.integrations.mlflow.mlflow_environment import MLFlowEnvironment\n",
-    "\n",
-    "with MLFlowEnvironment() as env:\n",
-    "    !mlflow ui --backend-store-uri {env.tracking_uri} --port 4997"
+    "from zenml.integrations.mlflow.mlflow_utils import get_tracking_uri\n",
+    "!mlflow ui --backend-store-uri=\"{get_tracking_uri()}\" --port=4997"
    ]
   },
   {
@@ -974,15 +978,6 @@
     "y_test[0]"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "!pip install matplotlib"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -1189,7 +1184,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.10"
+   "version": "3.8.13"
   }
  },
  "nbformat": 4,

diff --git a/02 - Transitioning To Production.ipynb b/02 - Transitioning To Production.ipynb
@@ -97,7 +97,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!zenml integration install kubeflow seldon s3 -f"
+    "!zenml integration install kubeflow seldon s3 aws -f"
    ]
   },
   {
@@ -218,6 +218,7 @@
    "source": [
     "# Replace the following with your own configuration. Use the below as exemplary.\n",
     "\n",
+    "KUBE_CONTEXT=\"zenml-eks\"\n",
     "AWS_EKS_CLUSTER=\"zenhacks-cluster\"\n",
     "AWS_REGION=\"us-east-1\"\n",
     "ECR_REGISTRY_NAME=\"715803424590.dkr.ecr.us-east-1.amazonaws.com\"\n",
@@ -242,25 +243,7 @@
     "!aws ecr get-login-password --region {AWS_REGION} | docker login --username AWS --password-stdin {ECR_REGISTRY_NAME}\n",
     "\n",
     "# Create a Kubernetes configuration context that points to the EKS cluster\n",
-    "!aws eks --region {AWS_REGION} update-kubeconfig --name {AWS_EKS_CLUSTER}"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Next, we need to set up a Kubernetes Secret to give Seldon Core access to the AWS S3 artifact store in the configured namespace.\n",
-    "\n",
-    "NOTE: this is based on the assumption that Seldon Core is running in an EKS cluster that already has IAM access enabled and doesn't need any explicit AWS credentials. If that is not the case, you will need to set up credentials differently. Please look up the variables relevant to your use-case in the official [Seldon Core documentation](https://docs.seldon.io/projects/seldon-core/en/latest/servers/overview.html#handling-credentials)."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "!kubectl -n {KUBEFLOW_NAMESPACE} create secret generic seldon-init-container-secret --from-literal=RCLONE_CONFIG_S3_PROVIDER='aws' --from-literal=RCLONE_CONFIG_S3_TYPE='s3' --from-literal=RCLONE_CONFIG_S3_ENV_AUTH=true"
+    "!aws eks --region {AWS_REGION} update-kubeconfig --name {AWS_EKS_CLUSTER} --alias {KUBE_CONTEXT}"
    ]
   },
   {
@@ -299,20 +282,20 @@
     "!zenml container-registry register ecr_registry --type=default --uri={ECR_REGISTRY_NAME}\n",
     "\n",
     "# Register orchestrator (Kubeflow on AWS)\n",
-    "!zenml orchestrator register eks_orchestrator --type=kubeflow\n",
+    "!zenml orchestrator register eks_orchestrator --type=kubeflow --kubernetes_context={KUBE_CONTEXT} --synchronous=True\n",
     "\n",
     "# Register metadata store and artifact store\n",
     "!zenml metadata-store register kubeflow_metadata_store --type=kubeflow\n",
     "!zenml artifact-store register s3_store --type=s3 --path={S3_BUCKET_NAME}\n",
     "\n",
     "# Register the Seldon Core model deployer (Seldon on AWS)\n",
-    "!zenml model-deployer register eks_seldon --type=seldon --kubernetes_namespace={KUBEFLOW_NAMESPACE} --base_url=http://{INGRESS_HOST[0]}\n",
+    "!zenml model-deployer register eks_seldon --type=seldon --kubernetes_context={KUBE_CONTEXT} --kubernetes_namespace={KUBEFLOW_NAMESPACE} --base_url=http://{INGRESS_HOST[0]} --secret=s3_store\n",
     "\n",
     "# Register a secret manager\n",
-    "!zenml secrets-manager register local_secret_manager --type=local\n",
+    "!zenml secrets-manager register aws_secret_manager --type=aws\n",
     "\n",
     "# Register the aws_kubeflow_stack\n",
-    "!zenml stack register aws_kubeflow_stack -m kubeflow_metadata_store -a s3_store -o eks_orchestrator -c ecr_registry -d eks_seldon -x local_secret_manager"
+    "!zenml stack register aws_kubeflow_stack -m kubeflow_metadata_store -a s3_store -o eks_orchestrator -c ecr_registry -d eks_seldon -x aws_secret_manager"
    ]
   },
   {
@@ -331,23 +314,37 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Transition to Production (Run on the Cloud)\n",
+    "Next, we need to set up a ZenML Secret to give Seldon Core access to the AWS S3 artifact store in the configured namespace, by running the `zenml secret register` command.\n",
     "\n",
-    "Once the stack is configured, all that is left to do is to set it active and to run a pipeline. Note that the code itself DOES NOT need to change, only the active stack.\n",
+    "NOTE: this is based on the assumption that Seldon Core is running in an EKS cluster that already has IAM access enabled and doesn't need any explicit AWS credentials. For more information on setting up ZenML secrets for Seldon Core, please see the [Managing Seldon Core Credentials](https://github.com/zenml-io/zenml/blob/main/examples/seldon_deployment/README.md#managing-seldon-core-credentials) section in our [Seldon Core Continuous Deployment Example](https://github.com/zenml-io/zenml/blob/main/examples/seldon_deployment/README.md).\n",
     "\n",
-    "ZenML will detect that the stack has changed, and instead of running your pipeline locally, will build a Docker Image, push it to the container registry with your requirements, and deploy the pipeline with that image on Kubeflow Pipelines. This whole process is usually very painful but simplified with ZenML, and is completely customizable.\n",
+    "For the IAM access case, you can run this command to create the secret:\n",
     "\n",
-    "For now, try it out! It might take a few minutes to build and push the image, but after that you'd see your pipeline in the cloud!"
+    "`zenml secret register -s seldon_s3 s3_store`\n",
+    "\n",
+    ", and only set the `rclone_config_s3_env_auth` key to `True`. However, we cannot do this in the Jupyter Notebook, because interactive CLI commands are not supported, so we'll do it programmatically:"
    ]
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
+   "outputs": [],
    "source": [
-    "<div class=\"alert alert-block alert-info\">\n",
-    "    <b>Note:</b> Currently running pipelines defined within a jupyter notebook cell is\n",
-    "    not supported. To get around this you can run the train pipeline within this repo. \n",
-    "</div>"
+    "from zenml.repository import Repository\n",
+    "from zenml.integrations.seldon.secret_schemas import SeldonS3SecretSchema\n",
+    "\n",
+    "secrets_manager = Repository().active_stack.secrets_manager\n",
+    "secret = SeldonS3SecretSchema(\n",
+    "    name = \"s3_store\",\n",
+    "    rclone_config_s3_env_auth = True\n",
+    ")\n",
+    "try:\n",
+    "    secrets_manager.get_secret(\"s3_store\")\n",
+    "except RuntimeError:\n",
+    "    secrets_manager.register_secret(secret)\n",
+    "\n",
+    "!zenml secret get s3_store"
    ]
   },
   {
@@ -358,29 +355,49 @@
    },
    "outputs": [],
    "source": [
-    "# Let's train within kubeflow pipelines - this will deploy the pipeline\n",
-    "!python run.py --deploy # --interval-second=300"
+    "!zenml stack up"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The output will indicate the URL that we can access to view Kubeflow pipelines locally (e.g. [http://localhost:8080/](http://localhost:8080/))."
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "In order to see the pipeline run, you should port-forward Kubeflow Pipelines to: [http://localhost:8080/](http://localhost:8080/). You might want to try this is a seperate shell:\n",
+    "## Transition to Production (Run on the Cloud)\n",
+    "\n",
+    "Once the stack is configured, all that is left to do is to set it active and to run a pipeline. Note that the code itself DOES NOT need to change, only the active stack.\n",
+    "\n",
+    "ZenML will detect that the stack has changed, and instead of running your pipeline locally, will build a Docker Image, push it to the container registry with your requirements, and deploy the pipeline with that image on Kubeflow Pipelines. This whole process is usually very painful but simplified with ZenML, and is completely customizable.\n",
     "\n",
-    "```\n",
-    "kubectl port-forward -n kubeflow svc/ml-pipeline-ui 8080:80\n",
-    "```"
+    "For now, try it out! It might take a few minutes to build and push the image, but after that you'd see your pipeline in the cloud!"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<div class=\"alert alert-block alert-info\">\n",
+    "    <b>Note:</b> Currently running pipelines defined within a jupyter notebook cell is\n",
+    "    not supported. To get around this you can run the train pipeline within this repo. \n",
+    "</div>"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "scrolled": true
+   },
    "outputs": [],
    "source": [
-    "# Do this only if the port forward from `zenml stack up` did not work. \n",
-    "!kubectl port-forward -n kubeflow svc/ml-pipeline-ui 8080:80"
+    "# Let's train within kubeflow pipelines - this will deploy the pipeline\n",
+    "!python run.py --deploy # --interval-second=300"
    ]
   },
   {
@@ -400,6 +417,25 @@
     "\n",
     "Next up, more about stacks, running pipelines on a schedule, and much more coming soon!"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Cleanup\n",
+    "\n",
+    "Once you are done running pipelines with the AWS stack, you can run the following command to stop the Seldon Core model server and the local daemons:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!zenml served-models delete <UUID-of-model-server>\n",
+    "!zenml stack down -f"
+   ]
   }
  ],
  "metadata": {

diff --git a/README.md b/README.md
@@ -90,6 +90,8 @@ Once you are done running all notebooks you might want to stop all running proce
 
 
 ```shell
+zenml stack set aws_kubeflow_stack
+zenml stack down -f
 zenml stack set local_kubeflow_stack
 zenml stack down -f
 ```

diff --git a/run.py b/run.py
@@ -27,7 +27,8 @@
     prediction_service_loader,
     PredictionServiceLoaderStepConfig,
 )
-from steps.trainer import svc_trainer_mlflow  # type: ignore [import]
+from steps.trainer import svc_trainer_mlflow
+from steps.mlflow_trainer import svc_trainer_mlflow as mlflow_svc_trainer_mlflow
 
 
 from zenml.pipelines import Schedule
@@ -121,6 +122,7 @@ def main(
     if deploy:
 
         if use_seldon:
+            model_trainer_step = svc_trainer_mlflow
             model_deployer_step = seldon_model_deployer_step(
                 config=SeldonDeployerStepConfig(
                     service_config=SeldonDeploymentConfig(
@@ -133,6 +135,7 @@ def main(
                 )
             )
         else:
+            model_trainer_step = mlflow_svc_trainer_mlflow
             model_deployer_step = (
                 mlflow_model_deployer_step(
                     config=MLFlowDeployerConfig(workers=1, timeout=20)
@@ -142,7 +145,7 @@ def main(
         # Initialize a continuous deployment pipeline run
         deployment = continuous_deployment_pipeline(
             importer=importer(),
-            trainer=svc_trainer_mlflow(),
+            trainer=model_trainer_step(),
             evaluator=evaluator(),
             # EvidentlyProfileStep takes reference_dataset and comparison dataset
             get_reference_data=get_reference_data(),
@@ -188,7 +191,7 @@ def main(
         service = services[0]
         if service.is_running:
             print(
-                f"The mode prediction server is running and accepts inference "
+                f"The model prediction server is running and accepts inference "
                 f"requests at:\n"
                 f"    {service.prediction_url}\n"
                 f"To stop the service, run "

diff --git a/steps/evaluator.py b/steps/evaluator.py
@@ -1,14 +1,7 @@
-import os
-
-import mlflow  # type: ignore [import]
 import numpy as np  # type: ignore [import]
 from sklearn.base import ClassifierMixin
 
-from zenml.integrations.mlflow.mlflow_step_decorator import enable_mlflow
-from zenml.steps import step, Output, BaseStepConfig
-
-# Define the step and enable MLflow (n.b. order of decorators is important here)
-
+from zenml.steps import step
 
 @step
 def evaluator(

diff --git a/steps/mlflow_trainer.py b/steps/mlflow_trainer.py
@@ -0,0 +1,7 @@
+from steps.trainer import svc_trainer_mlflow, tree_trainer_with_mlflow
+from zenml.integrations.mlflow.mlflow_step_decorator import enable_mlflow
+
+
+# These are the same steps, but with mlflow enabled
+svc_trainer_mlflow = enable_mlflow(svc_trainer_mlflow)
+tree_trainer_with_mlflow = enable_mlflow(tree_trainer_with_mlflow)