Merge pull request #23 from zenml-io/bugfix/fix-stack-registration-an…

…d-nested-step-definitions Fix stack registration and nested step definitions
zenml-io · Feb 2, 2023 · 67aa085 · 67aa085
2 parents 35f5631 + 7276ba9
commit 67aa085
Show file tree

Hide file tree

Showing 8 changed files with 93 additions and 96 deletions.
diff --git a/1-1_Pipelines.ipynb b/1-1_Pipelines.ipynb
@@ -182,7 +182,7 @@
     "    X_train: np.ndarray,\n",
     "    y_train: np.ndarray,\n",
     ") -> ClassifierMixin:\n",
-    "    \"\"\"Train a sklearn SVC classifier.\"\"\"\n",
+    "    \"\"\"Train an sklearn SVC classifier.\"\"\"\n",
     "    model = SVC(gamma=0.001)\n",
     "    model.fit(X_train, y_train)\n",
     "    return model\n",

diff --git a/1-2_Artifact_Lineage.ipynb b/1-2_Artifact_Lineage.ipynb
@@ -257,7 +257,7 @@
     "    X_train: np.ndarray,\n",
     "    y_train: np.ndarray,\n",
     ") -> ClassifierMixin:\n",
-    "    \"\"\"Train a sklearn decision tree classifier.\"\"\"\n",
+    "    \"\"\"Train an sklearn decision tree classifier.\"\"\"\n",
     "    model = DecisionTreeClassifier()\n",
     "    model.fit(X_train, y_train)\n",
     "    return model\n",

diff --git a/2-1_Experiment_Tracking.ipynb b/2-1_Experiment_Tracking.ipynb
@@ -116,8 +116,11 @@
     "# Register the MLflow experiment tracker\n",
     "!zenml experiment-tracker register mlflow_tracker --flavor=mlflow\n",
     "\n",
-    "# Add the MLflow experiment tracker into our default stack\n",
-    "!zenml stack update default -e mlflow_tracker"
+    "# Create a new stack that includes an MLflow experiment\n",
+    "!zenml stack register mlflow_exp_tracker_stack -a default -o default -e mlflow_tracker\n",
+    "\n",
+    "# Set the new stack as active\n",
+    "!zenml stack set mlflow_exp_tracker_stack\n"
    ]
   },
   {
@@ -141,16 +144,15 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "## Use MLFlow in a ZenML Pipeline\n",
     "\n",
     "To integrate the MLFlow experiment tracker into our previously defined ZenML pipeline, we only need to adjust the `svc_trainer` step. Let us define a new `svc_trainer_mlflow` step in which we use MLflow's [`mlflow.sklearn.autolog()`](https://www.mlflow.org/docs/latest/python_api/mlflow.sklearn.html#mlflow.sklearn.autolog) feature to automatically log all relevant attributes and metrics of our model to MLflow. \n",
     "\n",
-    "By adding an `experiment_tracker=mlflow_tracker` parameter in the `@step` decorator, ZenML automatically takes care of correctly initializing MLflow.\n",
-    "\n",
-    "The following function creates such a step, parametrized by the SVC hyperparameter `gamma`, then returns a corresponding ML pipeline. See the [sklearn docs](https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html) for more details on the SVC model and its hyperparameters."
+    "By adding an `experiment_tracker=mlflow_tracker` parameter in the `@step` decorator, ZenML automatically takes care of initializing MLflow."
    ]
   },
   {
@@ -165,24 +167,22 @@
     "from sklearn.svm import SVC\n",
     "from zenml.steps import step\n",
     "\n",
-    "\n",
-    "def build_svc_mlflow_pipeline(gamma=1e-3):\n",
-    "    @step(enable_cache=False, experiment_tracker=\"mlflow_tracker\")\n",
-    "    def svc_trainer_mlflow(\n",
-    "        X_train: np.ndarray,\n",
-    "        y_train: np.ndarray,\n",
-    "    ) -> ClassifierMixin:\n",
-    "        \"\"\"Train a sklearn SVC classifier and log to MLflow.\"\"\"\n",
-    "        mlflow.sklearn.autolog()  # log all model hparams and metrics to MLflow\n",
-    "        model = SVC(gamma=gamma)\n",
-    "        model.fit(X_train, y_train)\n",
-    "        return model\n",
-    "\n",
-    "    return digits_pipeline(\n",
-    "        importer=importer(),\n",
-    "        trainer=svc_trainer_mlflow(),\n",
-    "        evaluator=evaluator(),\n",
-    "    )"
+    "@step(enable_cache=False, experiment_tracker=\"mlflow_tracker\")\n",
+    "def svc_trainer_mlflow(\n",
+    "    X_train: np.ndarray,\n",
+    "    y_train: np.ndarray,\n",
+    ") -> ClassifierMixin:\n",
+    "    \"\"\"Train an sklearn SVC classifier and log to MLflow.\"\"\"\n",
+    "    mlflow.sklearn.autolog()  # log all model hparams and metrics to MLflow\n",
+    "    model = SVC(gamma=1e-3)\n",
+    "    model.fit(X_train, y_train)\n",
+    "    return model\n",
+    "\n",
+    "svc_mlflow_pipeline = digits_pipeline(\n",
+    "    importer=importer(),\n",
+    "    trainer=svc_trainer_mlflow(),\n",
+    "    evaluator=evaluator(),\n",
+    ")"
    ]
   },
   {
@@ -200,31 +200,30 @@
    "source": [
     "from sklearn.tree import DecisionTreeClassifier\n",
     "\n",
-    "\n",
-    "def build_tree_mlflow_pipeline():\n",
-    "    @step(enable_cache=False, experiment_tracker=\"mlflow_tracker\")\n",
-    "    def tree_trainer_with_mlflow(\n",
-    "        X_train: np.ndarray,\n",
-    "        y_train: np.ndarray,\n",
-    "    ) -> ClassifierMixin:\n",
-    "        \"\"\"Train a sklearn decision tree classifier and log to MLflow.\"\"\"\n",
-    "        mlflow.sklearn.autolog()  # log all model hparams and metrics to MLflow\n",
-    "        model = DecisionTreeClassifier()\n",
-    "        model.fit(X_train, y_train)\n",
-    "        return model\n",
-    "\n",
-    "    return digits_pipeline(\n",
-    "        importer=importer(),\n",
-    "        trainer=tree_trainer_with_mlflow(),\n",
-    "        evaluator=evaluator(),\n",
-    "    )"
+    "@step(enable_cache=False, experiment_tracker=\"mlflow_tracker\")\n",
+    "def tree_trainer_with_mlflow(\n",
+    "    X_train: np.ndarray,\n",
+    "    y_train: np.ndarray,\n",
+    ") -> ClassifierMixin:\n",
+    "    \"\"\"Train an sklearn decision tree classifier and log to MLflow.\"\"\"\n",
+    "    mlflow.sklearn.autolog()  # log all model hparams and metrics to MLflow\n",
+    "    model = DecisionTreeClassifier()\n",
+    "    model.fit(X_train, y_train)\n",
+    "    return model\n",
+    "\n",
+    "tree_mlflow_pipeline = digits_pipeline(\n",
+    "    importer=importer(),\n",
+    "    trainer=tree_trainer_with_mlflow(),\n",
+    "    evaluator=evaluator(),\n",
+    ")"
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "And that's it, we're all set up! Now all `pipeline.run()` calls will automatically log all hyperparameters and metrics to MLflow. Let's try it out and do a few pipeline runs with different `gamma` values:"
+    "And that's it, we're all set up! Now all `pipeline.run()` calls will automatically log all hyperparameters and metrics to MLflow. Let's try it out:"
    ]
   },
   {
@@ -233,9 +232,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "for gamma in (0.0001, 0.001, 0.01, 0.1):\n",
-    "    build_svc_mlflow_pipeline(gamma=gamma).run(unlisted=True)\n",
-    "build_tree_mlflow_pipeline().run(unlisted=True)"
+    "svc_mlflow_pipeline.run(unlisted=True)\n",
+    "tree_mlflow_pipeline.run(unlisted=True)"
    ]
   },
   {
@@ -321,10 +319,10 @@
     "# Register the W&B experiment tracker\n",
     "!zenml experiment-tracker register wandb_tracker --flavor=wandb --api_key={WANDB_API_KEY} --entity={WANDB_ENTITY} --project_name={WANDB_PROJECT}\n",
     "\n",
-    "# Create a new MLOps stack with W&B experiment tracker in it\n",
+    "# Create a new stack that includes a W&B experiment tracker\n",
     "!zenml stack register wandb_stack -a default -o default -e wandb_tracker\n",
     "\n",
-    "# Set the wandb_stack as the active stack\n",
+    "# Set the new stack as active\n",
     "!zenml stack set wandb_stack"
    ]
   },
@@ -357,35 +355,35 @@
     "from steps.importer import importer\n",
     "\n",
     "\n",
-    "def build_svc_wandb_pipeline(gamma=1e-3):\n",
-    "    @step(enable_cache=False, experiment_tracker=\"wandb_tracker\")\n",
-    "    def svc_trainer_wandb(\n",
-    "        X_train: np.ndarray,\n",
-    "        y_train: np.ndarray,\n",
-    "    ) -> ClassifierMixin:\n",
-    "        \"\"\"Train a sklearn SVC classifier and log to W&B.\"\"\"\n",
-    "        wandb.log({\"gamma\": gamma})  # log gamma hparam to wandb\n",
-    "        model = SVC(gamma=gamma)\n",
-    "        model.fit(X_train, y_train)\n",
-    "        return model\n",
-    "\n",
-    "    @step(enable_cache=False, experiment_tracker=\"wandb_tracker\")\n",
-    "    def evaluator_wandb(\n",
-    "        X_test: np.ndarray,\n",
-    "        y_test: np.ndarray,\n",
-    "        model: ClassifierMixin,\n",
-    "    ) -> float:\n",
-    "        \"\"\"Calculate the accuracy on the test set and log to W&B.\"\"\"\n",
-    "        test_acc = model.score(X_test, y_test)\n",
-    "        wandb.log({\"test acc\": test_acc})  # log test_acc to wandb\n",
-    "        print(f\"Test accuracy: {test_acc}\")\n",
-    "        return test_acc\n",
-    "\n",
-    "    return digits_pipeline(\n",
-    "        importer=importer(),\n",
-    "        trainer=svc_trainer_wandb(),\n",
-    "        evaluator=evaluator_wandb(),\n",
-    "    )"
+    "@step(enable_cache=False, experiment_tracker=\"wandb_tracker\")\n",
+    "def svc_trainer_wandb(\n",
+    "    X_train: np.ndarray,\n",
+    "    y_train: np.ndarray,\n",
+    ") -> ClassifierMixin:\n",
+    "    \"\"\"Train an sklearn SVC classifier and log to W&B.\"\"\"\n",
+    "    gamma = 1e-3\n",
+    "    wandb.log({\"gamma\": gamma})  # log gamma hparam to wandb\n",
+    "    model = SVC(gamma=gamma)\n",
+    "    model.fit(X_train, y_train)\n",
+    "    return model\n",
+    "\n",
+    "@step(enable_cache=False, experiment_tracker=\"wandb_tracker\")\n",
+    "def evaluator_wandb(\n",
+    "    X_test: np.ndarray,\n",
+    "    y_test: np.ndarray,\n",
+    "    model: ClassifierMixin,\n",
+    ") -> float:\n",
+    "    \"\"\"Calculate the accuracy on the test set and log to W&B.\"\"\"\n",
+    "    test_acc = model.score(X_test, y_test)\n",
+    "    wandb.log({\"test acc\": test_acc})  # log test_acc to wandb\n",
+    "    print(f\"Test accuracy: {test_acc}\")\n",
+    "    return test_acc\n",
+    "\n",
+    "svc_wandb_pipeline = digits_pipeline(\n",
+    "    importer=importer(),\n",
+    "    trainer=svc_trainer_wandb(),\n",
+    "    evaluator=evaluator_wandb(),\n",
+    ")"
    ]
   },
   {
@@ -401,8 +399,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "for gamma in (1e-4, 1e-3, 1e-2, 1e-1):\n",
-    "    build_svc_wandb_pipeline(gamma=gamma).run(unlisted=True)\n",
+    "svc_wandb_pipeline.run(unlisted=True)\n",
     "\n",
     "print(f\"https://wandb.ai/{WANDB_ENTITY}/{WANDB_PROJECT}/runs/\")"
    ]
@@ -424,7 +421,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3.8.13 64-bit ('zenbytes-dev')",
+   "display_name": "zenml310",
    "language": "python",
    "name": "python3"
   },
@@ -438,11 +435,11 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.13"
+   "version": "3.10.0 (default, Nov 29 2022, 17:00:25) [Clang 14.0.0 (clang-1400.0.29.202)]"
   },
   "vscode": {
    "interpreter": {
-    "hash": "ec45946565c50b1d690aa5a9e3c974f5b62b9cc8d8934e441e52186140f79402"
+    "hash": "569b3361e3ec4d7692543ddda480ca8173a6c158bb706498f2e35ca1687a80ea"
    }
   }
  },

diff --git a/2-2_Local_Deployment.ipynb b/2-2_Local_Deployment.ipynb
@@ -132,17 +132,17 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Change back to our default MLOps stack (in case W&B stack is still active)\n",
-    "!zenml stack set default\n",
-    "\n",
     "# Define MLflow experiment tracker from last lesson\n",
     "!zenml experiment-tracker register mlflow_tracker --flavor=mlflow\n",
     "\n",
     "# Register the MLflow model deployer\n",
     "!zenml model-deployer register mlflow --flavor=mlflow\n",
     "\n",
-    "# Add the MLflow components into our default stack\n",
-    "!zenml stack update default -d mlflow -e mlflow_tracker"
+    "# Create a new stack with MLflow components\n",
+    "!zenml stack register mlflow_stack -a default -o default -d mlflow -e mlflow_tracker\n",
+    "\n",
+    "# Set the new stack as active\n",
+    "!zenml stack set mlflow_stack"
    ]
   },
   {

diff --git a/2-3_Inference_Pipelines.ipynb b/2-3_Inference_Pipelines.ipynb
@@ -42,10 +42,10 @@
     "!zenml integration install sklearn mlflow -y\n",
     "!rm -rf .zen\n",
     "!zenml init\n",
-    "!zenml stack set default\n",
     "!zenml experiment-tracker register mlflow_tracker --flavor=mlflow\n",
     "!zenml model-deployer register mlflow --flavor=mlflow\n",
-    "!zenml stack update default -d mlflow -e mlflow_tracker\n",
+    "!zenml stack register mlflow_stack -a default -o default -d mlflow -e mlflow_tracker\n",
+    "!zenml stack set mlflow_stack\n",
     "\n",
     "%pip install pyparsing==2.4.2  # required for Colab\n",
     "\n",

diff --git a/3-1_Data_Skew.ipynb b/3-1_Data_Skew.ipynb
@@ -186,8 +186,8 @@
    "outputs": [],
    "source": [
     "!zenml data-validator register evidently_validator --flavor=evidently\n",
-    "\n",
-    "!zenml stack update default -dv evidently_validator"
+    "!zenml stack register evidently_stack -a default -o default -dv evidently_validator\n",
+    "!zenml stack set evidently_stack"
    ]
   },
   {

diff --git a/steps/mlflow_trainer.py b/steps/mlflow_trainer.py
@@ -11,7 +11,7 @@ def svc_trainer_mlflow(
     X_train: np.ndarray,
     y_train: np.ndarray,
 ) -> ClassifierMixin:
-    """Train a sklearn SVC classifier and log to MLflow."""
+    """Train an sklearn SVC classifier and log to MLflow."""
     mlflow.sklearn.autolog()  # log all model hparams and metrics to MLflow
     model = SVC(gamma=0.001)
     model.fit(X_train, y_train)
@@ -23,7 +23,7 @@ def tree_trainer_with_mlflow(
     X_train: np.ndarray,
     y_train: np.ndarray,
 ) -> ClassifierMixin:
-    """Train a sklearn decision tree classifier and log to MLflow."""
+    """Train an sklearn decision tree classifier and log to MLflow."""
     mlflow.sklearn.autolog()  # log all model hparams and metrics to MLflow
     model = DecisionTreeClassifier()
     model.fit(X_train, y_train)

diff --git a/steps/sklearn_trainer.py b/steps/sklearn_trainer.py
@@ -12,7 +12,7 @@ def svc_trainer(
     X_train: np.ndarray,
     y_train: np.ndarray,
 ) -> ClassifierMixin:
-    """Train a sklearn SVC classifier."""
+    """Train an sklearn SVC classifier."""
     model = SVC(gamma=0.001)
     model.fit(X_train, y_train)
     return model
@@ -23,7 +23,7 @@ def tree_trainer(
     X_train: np.ndarray,
     y_train: np.ndarray,
 ) -> ClassifierMixin:
-    """Train a sklearn decision tree classifier."""
+    """Train an sklearn decision tree classifier."""
     model = DecisionTreeClassifier()
     model.fit(X_train, y_train)
     return model