Skip to content
This repository has been archived by the owner on Jan 29, 2024. It is now read-only.

Commit

Permalink
Merge pull request #23 from zenml-io/bugfix/fix-stack-registration-an…
Browse files Browse the repository at this point in the history
…d-nested-step-definitions

Fix stack registration and nested step definitions
  • Loading branch information
fa9r authored Feb 2, 2023
2 parents 35f5631 + 7276ba9 commit 67aa085
Show file tree
Hide file tree
Showing 8 changed files with 93 additions and 96 deletions.
2 changes: 1 addition & 1 deletion 1-1_Pipelines.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@
" X_train: np.ndarray,\n",
" y_train: np.ndarray,\n",
") -> ClassifierMixin:\n",
" \"\"\"Train a sklearn SVC classifier.\"\"\"\n",
" \"\"\"Train an sklearn SVC classifier.\"\"\"\n",
" model = SVC(gamma=0.001)\n",
" model.fit(X_train, y_train)\n",
" return model\n",
Expand Down
2 changes: 1 addition & 1 deletion 1-2_Artifact_Lineage.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@
" X_train: np.ndarray,\n",
" y_train: np.ndarray,\n",
") -> ClassifierMixin:\n",
" \"\"\"Train a sklearn decision tree classifier.\"\"\"\n",
" \"\"\"Train an sklearn decision tree classifier.\"\"\"\n",
" model = DecisionTreeClassifier()\n",
" model.fit(X_train, y_train)\n",
" return model\n",
Expand Down
159 changes: 78 additions & 81 deletions 2-1_Experiment_Tracking.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -116,8 +116,11 @@
"# Register the MLflow experiment tracker\n",
"!zenml experiment-tracker register mlflow_tracker --flavor=mlflow\n",
"\n",
"# Add the MLflow experiment tracker into our default stack\n",
"!zenml stack update default -e mlflow_tracker"
"# Create a new stack that includes an MLflow experiment\n",
"!zenml stack register mlflow_exp_tracker_stack -a default -o default -e mlflow_tracker\n",
"\n",
"# Set the new stack as active\n",
"!zenml stack set mlflow_exp_tracker_stack\n"
]
},
{
Expand All @@ -141,16 +144,15 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## Use MLFlow in a ZenML Pipeline\n",
"\n",
"To integrate the MLFlow experiment tracker into our previously defined ZenML pipeline, we only need to adjust the `svc_trainer` step. Let us define a new `svc_trainer_mlflow` step in which we use MLflow's [`mlflow.sklearn.autolog()`](https://www.mlflow.org/docs/latest/python_api/mlflow.sklearn.html#mlflow.sklearn.autolog) feature to automatically log all relevant attributes and metrics of our model to MLflow. \n",
"\n",
"By adding an `experiment_tracker=mlflow_tracker` parameter in the `@step` decorator, ZenML automatically takes care of correctly initializing MLflow.\n",
"\n",
"The following function creates such a step, parametrized by the SVC hyperparameter `gamma`, then returns a corresponding ML pipeline. See the [sklearn docs](https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html) for more details on the SVC model and its hyperparameters."
"By adding an `experiment_tracker=mlflow_tracker` parameter in the `@step` decorator, ZenML automatically takes care of initializing MLflow."
]
},
{
Expand All @@ -165,24 +167,22 @@
"from sklearn.svm import SVC\n",
"from zenml.steps import step\n",
"\n",
"\n",
"def build_svc_mlflow_pipeline(gamma=1e-3):\n",
" @step(enable_cache=False, experiment_tracker=\"mlflow_tracker\")\n",
" def svc_trainer_mlflow(\n",
" X_train: np.ndarray,\n",
" y_train: np.ndarray,\n",
" ) -> ClassifierMixin:\n",
" \"\"\"Train a sklearn SVC classifier and log to MLflow.\"\"\"\n",
" mlflow.sklearn.autolog() # log all model hparams and metrics to MLflow\n",
" model = SVC(gamma=gamma)\n",
" model.fit(X_train, y_train)\n",
" return model\n",
"\n",
" return digits_pipeline(\n",
" importer=importer(),\n",
" trainer=svc_trainer_mlflow(),\n",
" evaluator=evaluator(),\n",
" )"
"@step(enable_cache=False, experiment_tracker=\"mlflow_tracker\")\n",
"def svc_trainer_mlflow(\n",
" X_train: np.ndarray,\n",
" y_train: np.ndarray,\n",
") -> ClassifierMixin:\n",
" \"\"\"Train an sklearn SVC classifier and log to MLflow.\"\"\"\n",
" mlflow.sklearn.autolog() # log all model hparams and metrics to MLflow\n",
" model = SVC(gamma=1e-3)\n",
" model.fit(X_train, y_train)\n",
" return model\n",
"\n",
"svc_mlflow_pipeline = digits_pipeline(\n",
" importer=importer(),\n",
" trainer=svc_trainer_mlflow(),\n",
" evaluator=evaluator(),\n",
")"
]
},
{
Expand All @@ -200,31 +200,30 @@
"source": [
"from sklearn.tree import DecisionTreeClassifier\n",
"\n",
"\n",
"def build_tree_mlflow_pipeline():\n",
" @step(enable_cache=False, experiment_tracker=\"mlflow_tracker\")\n",
" def tree_trainer_with_mlflow(\n",
" X_train: np.ndarray,\n",
" y_train: np.ndarray,\n",
" ) -> ClassifierMixin:\n",
" \"\"\"Train a sklearn decision tree classifier and log to MLflow.\"\"\"\n",
" mlflow.sklearn.autolog() # log all model hparams and metrics to MLflow\n",
" model = DecisionTreeClassifier()\n",
" model.fit(X_train, y_train)\n",
" return model\n",
"\n",
" return digits_pipeline(\n",
" importer=importer(),\n",
" trainer=tree_trainer_with_mlflow(),\n",
" evaluator=evaluator(),\n",
" )"
"@step(enable_cache=False, experiment_tracker=\"mlflow_tracker\")\n",
"def tree_trainer_with_mlflow(\n",
" X_train: np.ndarray,\n",
" y_train: np.ndarray,\n",
") -> ClassifierMixin:\n",
" \"\"\"Train an sklearn decision tree classifier and log to MLflow.\"\"\"\n",
" mlflow.sklearn.autolog() # log all model hparams and metrics to MLflow\n",
" model = DecisionTreeClassifier()\n",
" model.fit(X_train, y_train)\n",
" return model\n",
"\n",
"tree_mlflow_pipeline = digits_pipeline(\n",
" importer=importer(),\n",
" trainer=tree_trainer_with_mlflow(),\n",
" evaluator=evaluator(),\n",
")"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"And that's it, we're all set up! Now all `pipeline.run()` calls will automatically log all hyperparameters and metrics to MLflow. Let's try it out and do a few pipeline runs with different `gamma` values:"
"And that's it, we're all set up! Now all `pipeline.run()` calls will automatically log all hyperparameters and metrics to MLflow. Let's try it out:"
]
},
{
Expand All @@ -233,9 +232,8 @@
"metadata": {},
"outputs": [],
"source": [
"for gamma in (0.0001, 0.001, 0.01, 0.1):\n",
" build_svc_mlflow_pipeline(gamma=gamma).run(unlisted=True)\n",
"build_tree_mlflow_pipeline().run(unlisted=True)"
"svc_mlflow_pipeline.run(unlisted=True)\n",
"tree_mlflow_pipeline.run(unlisted=True)"
]
},
{
Expand Down Expand Up @@ -321,10 +319,10 @@
"# Register the W&B experiment tracker\n",
"!zenml experiment-tracker register wandb_tracker --flavor=wandb --api_key={WANDB_API_KEY} --entity={WANDB_ENTITY} --project_name={WANDB_PROJECT}\n",
"\n",
"# Create a new MLOps stack with W&B experiment tracker in it\n",
"# Create a new stack that includes a W&B experiment tracker\n",
"!zenml stack register wandb_stack -a default -o default -e wandb_tracker\n",
"\n",
"# Set the wandb_stack as the active stack\n",
"# Set the new stack as active\n",
"!zenml stack set wandb_stack"
]
},
Expand Down Expand Up @@ -357,35 +355,35 @@
"from steps.importer import importer\n",
"\n",
"\n",
"def build_svc_wandb_pipeline(gamma=1e-3):\n",
" @step(enable_cache=False, experiment_tracker=\"wandb_tracker\")\n",
" def svc_trainer_wandb(\n",
" X_train: np.ndarray,\n",
" y_train: np.ndarray,\n",
" ) -> ClassifierMixin:\n",
" \"\"\"Train a sklearn SVC classifier and log to W&B.\"\"\"\n",
" wandb.log({\"gamma\": gamma}) # log gamma hparam to wandb\n",
" model = SVC(gamma=gamma)\n",
" model.fit(X_train, y_train)\n",
" return model\n",
"\n",
" @step(enable_cache=False, experiment_tracker=\"wandb_tracker\")\n",
" def evaluator_wandb(\n",
" X_test: np.ndarray,\n",
" y_test: np.ndarray,\n",
" model: ClassifierMixin,\n",
" ) -> float:\n",
" \"\"\"Calculate the accuracy on the test set and log to W&B.\"\"\"\n",
" test_acc = model.score(X_test, y_test)\n",
" wandb.log({\"test acc\": test_acc}) # log test_acc to wandb\n",
" print(f\"Test accuracy: {test_acc}\")\n",
" return test_acc\n",
"\n",
" return digits_pipeline(\n",
" importer=importer(),\n",
" trainer=svc_trainer_wandb(),\n",
" evaluator=evaluator_wandb(),\n",
" )"
"@step(enable_cache=False, experiment_tracker=\"wandb_tracker\")\n",
"def svc_trainer_wandb(\n",
" X_train: np.ndarray,\n",
" y_train: np.ndarray,\n",
") -> ClassifierMixin:\n",
" \"\"\"Train an sklearn SVC classifier and log to W&B.\"\"\"\n",
" gamma = 1e-3\n",
" wandb.log({\"gamma\": gamma}) # log gamma hparam to wandb\n",
" model = SVC(gamma=gamma)\n",
" model.fit(X_train, y_train)\n",
" return model\n",
"\n",
"@step(enable_cache=False, experiment_tracker=\"wandb_tracker\")\n",
"def evaluator_wandb(\n",
" X_test: np.ndarray,\n",
" y_test: np.ndarray,\n",
" model: ClassifierMixin,\n",
") -> float:\n",
" \"\"\"Calculate the accuracy on the test set and log to W&B.\"\"\"\n",
" test_acc = model.score(X_test, y_test)\n",
" wandb.log({\"test acc\": test_acc}) # log test_acc to wandb\n",
" print(f\"Test accuracy: {test_acc}\")\n",
" return test_acc\n",
"\n",
"svc_wandb_pipeline = digits_pipeline(\n",
" importer=importer(),\n",
" trainer=svc_trainer_wandb(),\n",
" evaluator=evaluator_wandb(),\n",
")"
]
},
{
Expand All @@ -401,8 +399,7 @@
"metadata": {},
"outputs": [],
"source": [
"for gamma in (1e-4, 1e-3, 1e-2, 1e-1):\n",
" build_svc_wandb_pipeline(gamma=gamma).run(unlisted=True)\n",
"svc_wandb_pipeline.run(unlisted=True)\n",
"\n",
"print(f\"https://wandb.ai/{WANDB_ENTITY}/{WANDB_PROJECT}/runs/\")"
]
Expand All @@ -424,7 +421,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.8.13 64-bit ('zenbytes-dev')",
"display_name": "zenml310",
"language": "python",
"name": "python3"
},
Expand All @@ -438,11 +435,11 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.13"
"version": "3.10.0 (default, Nov 29 2022, 17:00:25) [Clang 14.0.0 (clang-1400.0.29.202)]"
},
"vscode": {
"interpreter": {
"hash": "ec45946565c50b1d690aa5a9e3c974f5b62b9cc8d8934e441e52186140f79402"
"hash": "569b3361e3ec4d7692543ddda480ca8173a6c158bb706498f2e35ca1687a80ea"
}
}
},
Expand Down
10 changes: 5 additions & 5 deletions 2-2_Local_Deployment.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -132,17 +132,17 @@
"metadata": {},
"outputs": [],
"source": [
"# Change back to our default MLOps stack (in case W&B stack is still active)\n",
"!zenml stack set default\n",
"\n",
"# Define MLflow experiment tracker from last lesson\n",
"!zenml experiment-tracker register mlflow_tracker --flavor=mlflow\n",
"\n",
"# Register the MLflow model deployer\n",
"!zenml model-deployer register mlflow --flavor=mlflow\n",
"\n",
"# Add the MLflow components into our default stack\n",
"!zenml stack update default -d mlflow -e mlflow_tracker"
"# Create a new stack with MLflow components\n",
"!zenml stack register mlflow_stack -a default -o default -d mlflow -e mlflow_tracker\n",
"\n",
"# Set the new stack as active\n",
"!zenml stack set mlflow_stack"
]
},
{
Expand Down
4 changes: 2 additions & 2 deletions 2-3_Inference_Pipelines.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,10 @@
"!zenml integration install sklearn mlflow -y\n",
"!rm -rf .zen\n",
"!zenml init\n",
"!zenml stack set default\n",
"!zenml experiment-tracker register mlflow_tracker --flavor=mlflow\n",
"!zenml model-deployer register mlflow --flavor=mlflow\n",
"!zenml stack update default -d mlflow -e mlflow_tracker\n",
"!zenml stack register mlflow_stack -a default -o default -d mlflow -e mlflow_tracker\n",
"!zenml stack set mlflow_stack\n",
"\n",
"%pip install pyparsing==2.4.2 # required for Colab\n",
"\n",
Expand Down
4 changes: 2 additions & 2 deletions 3-1_Data_Skew.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -186,8 +186,8 @@
"outputs": [],
"source": [
"!zenml data-validator register evidently_validator --flavor=evidently\n",
"\n",
"!zenml stack update default -dv evidently_validator"
"!zenml stack register evidently_stack -a default -o default -dv evidently_validator\n",
"!zenml stack set evidently_stack"
]
},
{
Expand Down
4 changes: 2 additions & 2 deletions steps/mlflow_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def svc_trainer_mlflow(
X_train: np.ndarray,
y_train: np.ndarray,
) -> ClassifierMixin:
"""Train a sklearn SVC classifier and log to MLflow."""
"""Train an sklearn SVC classifier and log to MLflow."""
mlflow.sklearn.autolog() # log all model hparams and metrics to MLflow
model = SVC(gamma=0.001)
model.fit(X_train, y_train)
Expand All @@ -23,7 +23,7 @@ def tree_trainer_with_mlflow(
X_train: np.ndarray,
y_train: np.ndarray,
) -> ClassifierMixin:
"""Train a sklearn decision tree classifier and log to MLflow."""
"""Train an sklearn decision tree classifier and log to MLflow."""
mlflow.sklearn.autolog() # log all model hparams and metrics to MLflow
model = DecisionTreeClassifier()
model.fit(X_train, y_train)
Expand Down
4 changes: 2 additions & 2 deletions steps/sklearn_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def svc_trainer(
X_train: np.ndarray,
y_train: np.ndarray,
) -> ClassifierMixin:
"""Train a sklearn SVC classifier."""
"""Train an sklearn SVC classifier."""
model = SVC(gamma=0.001)
model.fit(X_train, y_train)
return model
Expand All @@ -23,7 +23,7 @@ def tree_trainer(
X_train: np.ndarray,
y_train: np.ndarray,
) -> ClassifierMixin:
"""Train a sklearn decision tree classifier."""
"""Train an sklearn decision tree classifier."""
model = DecisionTreeClassifier()
model.fit(X_train, y_train)
return model

0 comments on commit 67aa085

Please sign in to comment.