diff --git a/intents/intents.ipynb b/intents/intents.ipynb
index 993b285..6c022b7 100644
--- a/intents/intents.ipynb
+++ b/intents/intents.ipynb
@@ -8,7 +8,7 @@
"source": [
"# Isidro Intent Classification (BERT-Based Transfer Learning)\n",
"\n",
- "This notebook fine-tunes BERT to perform intent classification for Isidro.\n",
+ "This notebook fine-tunes BERT to perform intent classification for Isidro, and compares that model performance to AutoML and BQML models.\n",
"\n",
"Adapted from Prof. Dr. Johannes Maucher's [Intent Classification with BERT](https://hannibunny.github.io/mlbook/transformer/intent_classification_with_bert.html)"
]
@@ -32,7 +32,21 @@
"id": "SCjmX4zTCkRK"
},
"source": [
- "## Setup\n"
+ "## Setup"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### __Configuration__"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The notebook depends on pre-configured authentication to Google Cloud Platform. For example, you could use a Vertex AI Workbench notebook, with a sufficiently-credentialed service account. Otherwise, Google Cloud authentication needs to be added."
]
},
{
@@ -41,12 +55,55 @@
"metadata": {},
"outputs": [],
"source": [
- "DATA_FILE = \"quality.csv\""
+ "# CHANGE THESE\n",
+ "\n",
+ "# The project should already exist and have Vertex/GCS/BigQuery APIs enabled\n",
+ "# A \"playground\" project is recommended for isolation and easy teardown\n",
+ "GCP_PROJECT = \"example\"\n",
+ "\n",
+ "# Name only - bucket will be created in the notebook\n",
+ "GCP_BUCKET = \"isidro_intent_classification\""
]
},
{
"cell_type": "code",
"execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# OPTIONALLY, CHANGE THESE\n",
+ "DATA_FILE = \"quality.csv\"\n",
+ "GCP_REGION = \"us-central1\"\n",
+ "VERTEX_MODEL_NAME_PREFIX = \"isidro_intents\"\n",
+ "VERTEX_MODEL_ROUND = \"r1\" # suffix for differentiating separate models (e.g., when running this notebook multiple times)\n",
+ "VERTEX_MODEL_DESCRIPTION = \"Isidro intent classification model\"\n",
+ "EXPERIMENT_NAME = \"isidro-intents-compare-custom-automl-bqml\"\n",
+ "BQ_DATASET = \"isidro_intents\" # name only - dataset will be created in the notebook"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# TUNE THESE ON LATER ITERATIONS\n",
+ "EPOCHS = 8 # custom tensorflow model epochs\n",
+ "LEARNING_RATE = 1e-5 # custom tensorflow model learning rate (Adam optimizer)\n",
+ "BATCH_SIZE = 32 # custom tensorflow model batch size\n",
+ "BERT_MODEL_NAME = 'small_bert/bert_en_uncased_L-8_H-512_A-8' # base model for transfer learning"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### __Dependencies__"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
"metadata": {
"id": "q-YbjCkzw0yU",
"tags": []
@@ -57,55 +114,70 @@
"output_type": "stream",
"text": [
"Requirement already satisfied: flask==2.2.2 in /opt/conda/lib/python3.7/site-packages (from -r service/requirements.txt (line 1)) (2.2.2)\n",
- "Requirement already satisfied: gunicorn==20.1.0 in /opt/conda/lib/python3.7/site-packages (from -r service/requirements.txt (line 2)) (20.1.0)\n",
- "Requirement already satisfied: tensorflow==2.8.4 in /opt/conda/lib/python3.7/site-packages (from -r service/requirements.txt (line 3)) (2.8.4)\n",
- "Requirement already satisfied: tensorflow_hub==0.12.0 in /opt/conda/lib/python3.7/site-packages (from -r service/requirements.txt (line 4)) (0.12.0)\n",
- "Requirement already satisfied: tensorflow_text==2.8.2 in /opt/conda/lib/python3.7/site-packages (from -r service/requirements.txt (line 5)) (2.8.2)\n",
- "Requirement already satisfied: Jinja2>=3.0 in /opt/conda/lib/python3.7/site-packages (from flask==2.2.2->-r service/requirements.txt (line 1)) (3.1.2)\n",
- "Requirement already satisfied: itsdangerous>=2.0 in /opt/conda/lib/python3.7/site-packages (from flask==2.2.2->-r service/requirements.txt (line 1)) (2.1.2)\n",
+ "Requirement already satisfied: google-cloud-aiplatform==1.18.3 in /opt/conda/lib/python3.7/site-packages (from -r service/requirements.txt (line 2)) (1.18.3)\n",
+ "Requirement already satisfied: gunicorn==20.1.0 in /opt/conda/lib/python3.7/site-packages (from -r service/requirements.txt (line 3)) (20.1.0)\n",
+ "Requirement already satisfied: tensorflow==2.8.4 in /opt/conda/lib/python3.7/site-packages (from -r service/requirements.txt (line 4)) (2.8.4)\n",
+ "Requirement already satisfied: tensorflow_hub==0.12.0 in /opt/conda/lib/python3.7/site-packages (from -r service/requirements.txt (line 5)) (0.12.0)\n",
+ "Requirement already satisfied: tensorflow_text==2.8.2 in /opt/conda/lib/python3.7/site-packages (from -r service/requirements.txt (line 6)) (2.8.2)\n",
"Requirement already satisfied: importlib-metadata>=3.6.0 in /opt/conda/lib/python3.7/site-packages (from flask==2.2.2->-r service/requirements.txt (line 1)) (5.1.0)\n",
- "Requirement already satisfied: click>=8.0 in /opt/conda/lib/python3.7/site-packages (from flask==2.2.2->-r service/requirements.txt (line 1)) (8.1.3)\n",
"Requirement already satisfied: Werkzeug>=2.2.2 in /opt/conda/lib/python3.7/site-packages (from flask==2.2.2->-r service/requirements.txt (line 1)) (2.2.2)\n",
- "Requirement already satisfied: setuptools>=3.0 in /opt/conda/lib/python3.7/site-packages (from gunicorn==20.1.0->-r service/requirements.txt (line 2)) (65.5.1)\n",
- "Requirement already satisfied: numpy>=1.20 in /opt/conda/lib/python3.7/site-packages (from tensorflow==2.8.4->-r service/requirements.txt (line 3)) (1.21.6)\n",
- "Requirement already satisfied: typing-extensions>=3.6.6 in /opt/conda/lib/python3.7/site-packages (from tensorflow==2.8.4->-r service/requirements.txt (line 3)) (4.4.0)\n",
- "Requirement already satisfied: wrapt>=1.11.0 in /opt/conda/lib/python3.7/site-packages (from tensorflow==2.8.4->-r service/requirements.txt (line 3)) (1.14.1)\n",
- "Requirement already satisfied: keras<2.9,>=2.8.0rc0 in /opt/conda/lib/python3.7/site-packages (from tensorflow==2.8.4->-r service/requirements.txt (line 3)) (2.8.0)\n",
- "Requirement already satisfied: flatbuffers>=1.12 in /opt/conda/lib/python3.7/site-packages (from tensorflow==2.8.4->-r service/requirements.txt (line 3)) (22.12.6)\n",
- "Requirement already satisfied: gast>=0.2.1 in /opt/conda/lib/python3.7/site-packages (from tensorflow==2.8.4->-r service/requirements.txt (line 3)) (0.5.3)\n",
- "Requirement already satisfied: termcolor>=1.1.0 in /opt/conda/lib/python3.7/site-packages (from tensorflow==2.8.4->-r service/requirements.txt (line 3)) (2.1.1)\n",
- "Requirement already satisfied: tensorflow-estimator<2.9,>=2.8 in /opt/conda/lib/python3.7/site-packages (from tensorflow==2.8.4->-r service/requirements.txt (line 3)) (2.8.0)\n",
- "Requirement already satisfied: grpcio<2.0,>=1.24.3 in /opt/conda/lib/python3.7/site-packages (from tensorflow==2.8.4->-r service/requirements.txt (line 3)) (1.51.1)\n",
- "Requirement already satisfied: tensorboard<2.9,>=2.8 in /opt/conda/lib/python3.7/site-packages (from tensorflow==2.8.4->-r service/requirements.txt (line 3)) (2.8.0)\n",
- "Requirement already satisfied: tensorflow-io-gcs-filesystem>=0.23.1 in /opt/conda/lib/python3.7/site-packages (from tensorflow==2.8.4->-r service/requirements.txt (line 3)) (0.28.0)\n",
- "Requirement already satisfied: google-pasta>=0.1.1 in /opt/conda/lib/python3.7/site-packages (from tensorflow==2.8.4->-r service/requirements.txt (line 3)) (0.2.0)\n",
- "Requirement already satisfied: h5py>=2.9.0 in /opt/conda/lib/python3.7/site-packages (from tensorflow==2.8.4->-r service/requirements.txt (line 3)) (3.7.0)\n",
- "Requirement already satisfied: keras-preprocessing>=1.1.1 in /opt/conda/lib/python3.7/site-packages (from tensorflow==2.8.4->-r service/requirements.txt (line 3)) (1.1.2)\n",
- "Requirement already satisfied: astunparse>=1.6.0 in /opt/conda/lib/python3.7/site-packages (from tensorflow==2.8.4->-r service/requirements.txt (line 3)) (1.6.3)\n",
- "Requirement already satisfied: absl-py>=0.4.0 in /opt/conda/lib/python3.7/site-packages (from tensorflow==2.8.4->-r service/requirements.txt (line 3)) (1.3.0)\n",
- "Requirement already satisfied: libclang>=9.0.1 in /opt/conda/lib/python3.7/site-packages (from tensorflow==2.8.4->-r service/requirements.txt (line 3)) (14.0.6)\n",
- "Requirement already satisfied: opt-einsum>=2.3.2 in /opt/conda/lib/python3.7/site-packages (from tensorflow==2.8.4->-r service/requirements.txt (line 3)) (3.3.0)\n",
- "Requirement already satisfied: six>=1.12.0 in /opt/conda/lib/python3.7/site-packages (from tensorflow==2.8.4->-r service/requirements.txt (line 3)) (1.16.0)\n",
- "Requirement already satisfied: protobuf<3.20,>=3.9.2 in /opt/conda/lib/python3.7/site-packages (from tensorflow==2.8.4->-r service/requirements.txt (line 3)) (3.19.6)\n",
- "Requirement already satisfied: wheel<1.0,>=0.23.0 in /opt/conda/lib/python3.7/site-packages (from astunparse>=1.6.0->tensorflow==2.8.4->-r service/requirements.txt (line 3)) (0.38.4)\n",
+ "Requirement already satisfied: click>=8.0 in /opt/conda/lib/python3.7/site-packages (from flask==2.2.2->-r service/requirements.txt (line 1)) (8.1.3)\n",
+ "Requirement already satisfied: itsdangerous>=2.0 in /opt/conda/lib/python3.7/site-packages (from flask==2.2.2->-r service/requirements.txt (line 1)) (2.1.2)\n",
+ "Requirement already satisfied: Jinja2>=3.0 in /opt/conda/lib/python3.7/site-packages (from flask==2.2.2->-r service/requirements.txt (line 1)) (3.1.2)\n",
+ "Requirement already satisfied: protobuf!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.19.5 in /opt/conda/lib/python3.7/site-packages (from google-cloud-aiplatform==1.18.3->-r service/requirements.txt (line 2)) (3.19.6)\n",
+ "Requirement already satisfied: google-cloud-storage<3.0.0dev,>=1.32.0 in /opt/conda/lib/python3.7/site-packages (from google-cloud-aiplatform==1.18.3->-r service/requirements.txt (line 2)) (2.7.0)\n",
+ "Requirement already satisfied: packaging<22.0.0dev,>=14.3 in /opt/conda/lib/python3.7/site-packages (from google-cloud-aiplatform==1.18.3->-r service/requirements.txt (line 2)) (21.3)\n",
+ "Requirement already satisfied: proto-plus<2.0.0dev,>=1.22.0 in /opt/conda/lib/python3.7/site-packages (from google-cloud-aiplatform==1.18.3->-r service/requirements.txt (line 2)) (1.22.1)\n",
+ "Requirement already satisfied: google-cloud-resource-manager<3.0.0dev,>=1.3.3 in /opt/conda/lib/python3.7/site-packages (from google-cloud-aiplatform==1.18.3->-r service/requirements.txt (line 2)) (1.8.0)\n",
+ "Requirement already satisfied: google-cloud-bigquery<3.0.0dev,>=1.15.0 in /opt/conda/lib/python3.7/site-packages (from google-cloud-aiplatform==1.18.3->-r service/requirements.txt (line 2)) (2.34.4)\n",
+ "Requirement already satisfied: google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0 in /opt/conda/lib/python3.7/site-packages (from google-cloud-aiplatform==1.18.3->-r service/requirements.txt (line 2)) (1.34.0)\n",
+ "Requirement already satisfied: setuptools>=3.0 in /opt/conda/lib/python3.7/site-packages (from gunicorn==20.1.0->-r service/requirements.txt (line 3)) (65.5.1)\n",
+ "Requirement already satisfied: libclang>=9.0.1 in /opt/conda/lib/python3.7/site-packages (from tensorflow==2.8.4->-r service/requirements.txt (line 4)) (14.0.6)\n",
+ "Requirement already satisfied: flatbuffers>=1.12 in /opt/conda/lib/python3.7/site-packages (from tensorflow==2.8.4->-r service/requirements.txt (line 4)) (22.12.6)\n",
+ "Requirement already satisfied: termcolor>=1.1.0 in /opt/conda/lib/python3.7/site-packages (from tensorflow==2.8.4->-r service/requirements.txt (line 4)) (2.1.1)\n",
+ "Requirement already satisfied: tensorflow-io-gcs-filesystem>=0.23.1 in /opt/conda/lib/python3.7/site-packages (from tensorflow==2.8.4->-r service/requirements.txt (line 4)) (0.28.0)\n",
+ "Requirement already satisfied: opt-einsum>=2.3.2 in /opt/conda/lib/python3.7/site-packages (from tensorflow==2.8.4->-r service/requirements.txt (line 4)) (3.3.0)\n",
+ "Requirement already satisfied: astunparse>=1.6.0 in /opt/conda/lib/python3.7/site-packages (from tensorflow==2.8.4->-r service/requirements.txt (line 4)) (1.6.3)\n",
+ "Requirement already satisfied: six>=1.12.0 in /opt/conda/lib/python3.7/site-packages (from tensorflow==2.8.4->-r service/requirements.txt (line 4)) (1.16.0)\n",
+ "Requirement already satisfied: typing-extensions>=3.6.6 in /opt/conda/lib/python3.7/site-packages (from tensorflow==2.8.4->-r service/requirements.txt (line 4)) (4.4.0)\n",
+ "Requirement already satisfied: tensorboard<2.9,>=2.8 in /opt/conda/lib/python3.7/site-packages (from tensorflow==2.8.4->-r service/requirements.txt (line 4)) (2.8.0)\n",
+ "Requirement already satisfied: absl-py>=0.4.0 in /opt/conda/lib/python3.7/site-packages (from tensorflow==2.8.4->-r service/requirements.txt (line 4)) (1.3.0)\n",
+ "Requirement already satisfied: grpcio<2.0,>=1.24.3 in /opt/conda/lib/python3.7/site-packages (from tensorflow==2.8.4->-r service/requirements.txt (line 4)) (1.51.1)\n",
+ "Requirement already satisfied: keras<2.9,>=2.8.0rc0 in /opt/conda/lib/python3.7/site-packages (from tensorflow==2.8.4->-r service/requirements.txt (line 4)) (2.8.0)\n",
+ "Requirement already satisfied: numpy>=1.20 in /opt/conda/lib/python3.7/site-packages (from tensorflow==2.8.4->-r service/requirements.txt (line 4)) (1.21.6)\n",
+ "Requirement already satisfied: google-pasta>=0.1.1 in /opt/conda/lib/python3.7/site-packages (from tensorflow==2.8.4->-r service/requirements.txt (line 4)) (0.2.0)\n",
+ "Requirement already satisfied: wrapt>=1.11.0 in /opt/conda/lib/python3.7/site-packages (from tensorflow==2.8.4->-r service/requirements.txt (line 4)) (1.14.1)\n",
+ "Requirement already satisfied: gast>=0.2.1 in /opt/conda/lib/python3.7/site-packages (from tensorflow==2.8.4->-r service/requirements.txt (line 4)) (0.5.3)\n",
+ "Requirement already satisfied: keras-preprocessing>=1.1.1 in /opt/conda/lib/python3.7/site-packages (from tensorflow==2.8.4->-r service/requirements.txt (line 4)) (1.1.2)\n",
+ "Requirement already satisfied: tensorflow-estimator<2.9,>=2.8 in /opt/conda/lib/python3.7/site-packages (from tensorflow==2.8.4->-r service/requirements.txt (line 4)) (2.8.0)\n",
+ "Requirement already satisfied: h5py>=2.9.0 in /opt/conda/lib/python3.7/site-packages (from tensorflow==2.8.4->-r service/requirements.txt (line 4)) (3.7.0)\n",
+ "Requirement already satisfied: wheel<1.0,>=0.23.0 in /opt/conda/lib/python3.7/site-packages (from astunparse>=1.6.0->tensorflow==2.8.4->-r service/requirements.txt (line 4)) (0.38.4)\n",
+ "Requirement already satisfied: requests<3.0.0dev,>=2.18.0 in /opt/conda/lib/python3.7/site-packages (from google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-cloud-aiplatform==1.18.3->-r service/requirements.txt (line 2)) (2.28.1)\n",
+ "Requirement already satisfied: googleapis-common-protos<2.0dev,>=1.56.2 in /opt/conda/lib/python3.7/site-packages (from google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-cloud-aiplatform==1.18.3->-r service/requirements.txt (line 2)) (1.57.0)\n",
+ "Requirement already satisfied: google-auth<3.0dev,>=1.25.0 in /opt/conda/lib/python3.7/site-packages (from google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-cloud-aiplatform==1.18.3->-r service/requirements.txt (line 2)) (2.15.0)\n",
+ "Requirement already satisfied: grpcio-status<2.0dev,>=1.33.2 in /opt/conda/lib/python3.7/site-packages (from google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-cloud-aiplatform==1.18.3->-r service/requirements.txt (line 2)) (1.48.2)\n",
+ "Requirement already satisfied: google-resumable-media<3.0dev,>=0.6.0 in /opt/conda/lib/python3.7/site-packages (from google-cloud-bigquery<3.0.0dev,>=1.15.0->google-cloud-aiplatform==1.18.3->-r service/requirements.txt (line 2)) (2.4.0)\n",
+ "Requirement already satisfied: python-dateutil<3.0dev,>=2.7.2 in /opt/conda/lib/python3.7/site-packages (from google-cloud-bigquery<3.0.0dev,>=1.15.0->google-cloud-aiplatform==1.18.3->-r service/requirements.txt (line 2)) (2.8.2)\n",
+ "Requirement already satisfied: google-cloud-core<3.0.0dev,>=1.4.1 in /opt/conda/lib/python3.7/site-packages (from google-cloud-bigquery<3.0.0dev,>=1.15.0->google-cloud-aiplatform==1.18.3->-r service/requirements.txt (line 2)) (2.3.2)\n",
+ "Requirement already satisfied: grpc-google-iam-v1<1.0.0dev,>=0.12.4 in /opt/conda/lib/python3.7/site-packages (from google-cloud-resource-manager<3.0.0dev,>=1.3.3->google-cloud-aiplatform==1.18.3->-r service/requirements.txt (line 2)) (0.12.4)\n",
"Requirement already satisfied: zipp>=0.5 in /opt/conda/lib/python3.7/site-packages (from importlib-metadata>=3.6.0->flask==2.2.2->-r service/requirements.txt (line 1)) (3.11.0)\n",
"Requirement already satisfied: MarkupSafe>=2.0 in /opt/conda/lib/python3.7/site-packages (from Jinja2>=3.0->flask==2.2.2->-r service/requirements.txt (line 1)) (2.1.1)\n",
- "Requirement already satisfied: google-auth-oauthlib<0.5,>=0.4.1 in /opt/conda/lib/python3.7/site-packages (from tensorboard<2.9,>=2.8->tensorflow==2.8.4->-r service/requirements.txt (line 3)) (0.4.6)\n",
- "Requirement already satisfied: google-auth<3,>=1.6.3 in /opt/conda/lib/python3.7/site-packages (from tensorboard<2.9,>=2.8->tensorflow==2.8.4->-r service/requirements.txt (line 3)) (2.15.0)\n",
- "Requirement already satisfied: tensorboard-plugin-wit>=1.6.0 in /opt/conda/lib/python3.7/site-packages (from tensorboard<2.9,>=2.8->tensorflow==2.8.4->-r service/requirements.txt (line 3)) (1.8.1)\n",
- "Requirement already satisfied: markdown>=2.6.8 in /opt/conda/lib/python3.7/site-packages (from tensorboard<2.9,>=2.8->tensorflow==2.8.4->-r service/requirements.txt (line 3)) (3.4.1)\n",
- "Requirement already satisfied: tensorboard-data-server<0.7.0,>=0.6.0 in /opt/conda/lib/python3.7/site-packages (from tensorboard<2.9,>=2.8->tensorflow==2.8.4->-r service/requirements.txt (line 3)) (0.6.1)\n",
- "Requirement already satisfied: requests<3,>=2.21.0 in /opt/conda/lib/python3.7/site-packages (from tensorboard<2.9,>=2.8->tensorflow==2.8.4->-r service/requirements.txt (line 3)) (2.28.1)\n",
- "Requirement already satisfied: rsa<5,>=3.1.4 in /opt/conda/lib/python3.7/site-packages (from google-auth<3,>=1.6.3->tensorboard<2.9,>=2.8->tensorflow==2.8.4->-r service/requirements.txt (line 3)) (4.9)\n",
- "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /opt/conda/lib/python3.7/site-packages (from google-auth<3,>=1.6.3->tensorboard<2.9,>=2.8->tensorflow==2.8.4->-r service/requirements.txt (line 3)) (5.2.0)\n",
- "Requirement already satisfied: pyasn1-modules>=0.2.1 in /opt/conda/lib/python3.7/site-packages (from google-auth<3,>=1.6.3->tensorboard<2.9,>=2.8->tensorflow==2.8.4->-r service/requirements.txt (line 3)) (0.2.8)\n",
- "Requirement already satisfied: requests-oauthlib>=0.7.0 in /opt/conda/lib/python3.7/site-packages (from google-auth-oauthlib<0.5,>=0.4.1->tensorboard<2.9,>=2.8->tensorflow==2.8.4->-r service/requirements.txt (line 3)) (1.3.1)\n",
- "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.7/site-packages (from requests<3,>=2.21.0->tensorboard<2.9,>=2.8->tensorflow==2.8.4->-r service/requirements.txt (line 3)) (2022.12.7)\n",
- "Requirement already satisfied: charset-normalizer<3,>=2 in /opt/conda/lib/python3.7/site-packages (from requests<3,>=2.21.0->tensorboard<2.9,>=2.8->tensorflow==2.8.4->-r service/requirements.txt (line 3)) (2.1.1)\n",
- "Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.7/site-packages (from requests<3,>=2.21.0->tensorboard<2.9,>=2.8->tensorflow==2.8.4->-r service/requirements.txt (line 3)) (3.4)\n",
- "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /opt/conda/lib/python3.7/site-packages (from requests<3,>=2.21.0->tensorboard<2.9,>=2.8->tensorflow==2.8.4->-r service/requirements.txt (line 3)) (1.26.13)\n",
- "Requirement already satisfied: pyasn1<0.5.0,>=0.4.6 in /opt/conda/lib/python3.7/site-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard<2.9,>=2.8->tensorflow==2.8.4->-r service/requirements.txt (line 3)) (0.4.8)\n",
- "Requirement already satisfied: oauthlib>=3.0.0 in /opt/conda/lib/python3.7/site-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<0.5,>=0.4.1->tensorboard<2.9,>=2.8->tensorflow==2.8.4->-r service/requirements.txt (line 3)) (3.2.2)\n"
+ "Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /opt/conda/lib/python3.7/site-packages (from packaging<22.0.0dev,>=14.3->google-cloud-aiplatform==1.18.3->-r service/requirements.txt (line 2)) (3.0.9)\n",
+ "Requirement already satisfied: tensorboard-plugin-wit>=1.6.0 in /opt/conda/lib/python3.7/site-packages (from tensorboard<2.9,>=2.8->tensorflow==2.8.4->-r service/requirements.txt (line 4)) (1.8.1)\n",
+ "Requirement already satisfied: markdown>=2.6.8 in /opt/conda/lib/python3.7/site-packages (from tensorboard<2.9,>=2.8->tensorflow==2.8.4->-r service/requirements.txt (line 4)) (3.4.1)\n",
+ "Requirement already satisfied: google-auth-oauthlib<0.5,>=0.4.1 in /opt/conda/lib/python3.7/site-packages (from tensorboard<2.9,>=2.8->tensorflow==2.8.4->-r service/requirements.txt (line 4)) (0.4.6)\n",
+ "Requirement already satisfied: tensorboard-data-server<0.7.0,>=0.6.0 in /opt/conda/lib/python3.7/site-packages (from tensorboard<2.9,>=2.8->tensorflow==2.8.4->-r service/requirements.txt (line 4)) (0.6.1)\n",
+ "Requirement already satisfied: pyasn1-modules>=0.2.1 in /opt/conda/lib/python3.7/site-packages (from google-auth<3.0dev,>=1.25.0->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-cloud-aiplatform==1.18.3->-r service/requirements.txt (line 2)) (0.2.8)\n",
+ "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /opt/conda/lib/python3.7/site-packages (from google-auth<3.0dev,>=1.25.0->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-cloud-aiplatform==1.18.3->-r service/requirements.txt (line 2)) (5.2.0)\n",
+ "Requirement already satisfied: rsa<5,>=3.1.4 in /opt/conda/lib/python3.7/site-packages (from google-auth<3.0dev,>=1.25.0->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-cloud-aiplatform==1.18.3->-r service/requirements.txt (line 2)) (4.9)\n",
+ "Requirement already satisfied: requests-oauthlib>=0.7.0 in /opt/conda/lib/python3.7/site-packages (from google-auth-oauthlib<0.5,>=0.4.1->tensorboard<2.9,>=2.8->tensorflow==2.8.4->-r service/requirements.txt (line 4)) (1.3.1)\n",
+ "Requirement already satisfied: google-crc32c<2.0dev,>=1.0 in /opt/conda/lib/python3.7/site-packages (from google-resumable-media<3.0dev,>=0.6.0->google-cloud-bigquery<3.0.0dev,>=1.15.0->google-cloud-aiplatform==1.18.3->-r service/requirements.txt (line 2)) (1.5.0)\n",
+ "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.7/site-packages (from requests<3.0.0dev,>=2.18.0->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-cloud-aiplatform==1.18.3->-r service/requirements.txt (line 2)) (2022.12.7)\n",
+ "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /opt/conda/lib/python3.7/site-packages (from requests<3.0.0dev,>=2.18.0->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-cloud-aiplatform==1.18.3->-r service/requirements.txt (line 2)) (1.26.13)\n",
+ "Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.7/site-packages (from requests<3.0.0dev,>=2.18.0->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-cloud-aiplatform==1.18.3->-r service/requirements.txt (line 2)) (3.4)\n",
+ "Requirement already satisfied: charset-normalizer<3,>=2 in /opt/conda/lib/python3.7/site-packages (from requests<3.0.0dev,>=2.18.0->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-cloud-aiplatform==1.18.3->-r service/requirements.txt (line 2)) (2.1.1)\n",
+ "Requirement already satisfied: pyasn1<0.5.0,>=0.4.6 in /opt/conda/lib/python3.7/site-packages (from pyasn1-modules>=0.2.1->google-auth<3.0dev,>=1.25.0->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-cloud-aiplatform==1.18.3->-r service/requirements.txt (line 2)) (0.4.8)\n",
+ "Requirement already satisfied: oauthlib>=3.0.0 in /opt/conda/lib/python3.7/site-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<0.5,>=0.4.1->tensorboard<2.9,>=2.8->tensorflow==2.8.4->-r service/requirements.txt (line 4)) (3.2.2)\n"
]
}
],
@@ -113,9 +185,16 @@
"!pip install -r service/requirements.txt"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### __Handle imports, utilites, and staging storage__"
+ ]
+ },
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 5,
"metadata": {
"id": "_XgTpm9ZxoN9",
"tags": []
@@ -135,134 +214,81 @@
"from sklearn.model_selection import train_test_split\n",
"from sklearn.preprocessing import LabelBinarizer\n",
"\n",
+ "from google.cloud import exceptions\n",
+ "\n",
+ "binarizer = LabelBinarizer()\n",
+ "\n",
"tf.get_logger().setLevel('ERROR')\n",
"rcParams['figure.figsize'] = 12, 8\n",
"warnings.filterwarnings(\"ignore\")"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### __Create Cloud Storage bucket__"
+ ]
+ },
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
- "df = pd.read_csv(DATA_FILE)\n",
+ "from google.cloud import storage\n",
"\n",
- "# Split off a testing datasets\n",
- "train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)\n",
+ "storage_client = storage.Client()\n",
"\n",
- "# Split the remaining data into training and validation datasets\n",
- "train_df, valid_df = train_test_split(train_df, test_size=0.1, random_state=42)"
+ "try:\n",
+ " bucket = storage_client.bucket(GCP_BUCKET)\n",
+ " bucket.storage_class = \"STANDARD\"\n",
+ " new_bucket = storage_client.create_bucket(bucket, location=GCP_REGION)\n",
+ "except exceptions.Conflict:\n",
+ " print(\"Bucket already exists - choose a new name if the bucket is not under your control\")\n",
+ " pass"
]
},
{
- "cell_type": "code",
- "execution_count": 5,
+ "cell_type": "markdown",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " text | \n",
- " intent | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 507 | \n",
- " Delete scheduled task | \n",
- " destroy cron | \n",
- "
\n",
- " \n",
- " 89 | \n",
- " Can you delete a cron job? | \n",
- " destroy cron | \n",
- "
\n",
- " \n",
- " 199 | \n",
- " Documentation, where can I find? Can you guide... | \n",
- " documentation | \n",
- "
\n",
- " \n",
- " 193 | \n",
- " Can you please reduce scale, I have too many r... | \n",
- " self scale | \n",
- "
\n",
- " \n",
- " 468 | \n",
- " Can you assist me with scheduling tasks for my... | \n",
- " create cron | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " text intent\n",
- "507 Delete scheduled task destroy cron\n",
- "89 Can you delete a cron job? destroy cron\n",
- "199 Documentation, where can I find? Can you guide... documentation\n",
- "193 Can you please reduce scale, I have too many r... self scale\n",
- "468 Can you assist me with scheduling tasks for my... create cron"
- ]
- },
- "execution_count": 5,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
"source": [
- "train_df.head()"
+ "## Custom model"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### __Data preparation__"
]
},
{
"cell_type": "code",
- "execution_count": 6,
- "metadata": {
- "scrolled": true
- },
+ "execution_count": 7,
+ "metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "(367, 2)"
+ "((347, 2), (82, 2), (82, 2))"
]
},
- "execution_count": 6,
+ "execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "train_df.shape"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "metadata": {},
- "outputs": [],
- "source": [
- "train_features = train_df.copy()\n",
- "train_labels = train_features.pop(\"intent\")"
+ "df = pd.read_csv(DATA_FILE)\n",
+ "\n",
+ "# Split off a testing dataset\n",
+ "train_df, test_df = train_test_split(df, test_size=0.16, random_state=42)\n",
+ "\n",
+ "# Split the remaining data into training and validation datasets\n",
+ "train_df, valid_df = train_test_split(train_df, test_size=0.19, random_state=42)\n",
+ "\n",
+ "train_df.shape, test_df.shape, valid_df.shape"
]
},
{
@@ -271,7 +297,12 @@
"metadata": {},
"outputs": [],
"source": [
- "train_features = train_features.values"
+ "# Separate out features and labels (training)\n",
+ "train_copy = train_df.copy()\n",
+ "train_labels = train_copy.pop(\"intent\")\n",
+ "train_labels = binarizer.fit_transform(train_labels.values)\n",
+ "train_features = train_copy.values\n",
+ "intent_count = train_labels.shape[1]"
]
},
{
@@ -280,47 +311,24 @@
"metadata": {},
"outputs": [],
"source": [
- "binarizer = LabelBinarizer()\n",
- "train_labels = binarizer.fit_transform(train_labels.values)"
+ "# Separate out features and labels (testing)\n",
+ "test_copy = test_df.copy()\n",
+ "test_labels = test_copy.pop(\"intent\")\n",
+ "test_labels = binarizer.transform(test_labels.values)\n",
+ "test_features = test_copy.values"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "(367, 8)"
- ]
- },
- "execution_count": 10,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "intent_count = train_labels.shape[1]\n",
- "train_labels.shape"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 11,
- "metadata": {},
"outputs": [],
"source": [
- "test_features = test_df.copy()\n",
- "test_labels = test_features.pop(\"intent\")\n",
- "valid_features = valid_df.copy()\n",
- "valid_labels = valid_features.pop(\"intent\")\n",
- "\n",
- "test_features = test_features.values\n",
- "valid_features = valid_features.values\n",
- "\n",
- "test_labels = binarizer.transform(test_labels.values)\n",
- "valid_labels = binarizer.transform(valid_labels.values)"
+ "# Separate out features and labels (validation)\n",
+ "valid_copy = valid_df.copy()\n",
+ "valid_labels = valid_copy.pop(\"intent\")\n",
+ "valid_labels = binarizer.transform(valid_labels.values)\n",
+ "valid_features = valid_copy.values"
]
},
{
@@ -329,7 +337,7 @@
"id": "dX8FtlpGJRE6"
},
"source": [
- "## Loading models from TensorFlow Hub\n",
+ "#### __Load model from TensorFlow Hub__\n",
"\n",
"Here you can choose which BERT model you will load from TensorFlow Hub and fine-tune. There are multiple BERT models available.\n",
"\n",
@@ -350,7 +358,7 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": 11,
"metadata": {
"id": "y8_ctG55-uTX",
"tags": []
@@ -366,7 +374,6 @@
}
],
"source": [
- "bert_model_name = 'small_bert/bert_en_uncased_L-8_H-512_A-8' \n",
"map_name_to_handle = {\n",
" 'bert_en_uncased_L-12_H-768_A-12':\n",
" 'https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/3',\n",
@@ -505,8 +512,8 @@
" 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/2',\n",
"}\n",
"\n",
- "tfhub_handle_encoder = map_name_to_handle[bert_model_name]\n",
- "tfhub_handle_preprocess = map_model_to_preprocess[bert_model_name]\n",
+ "tfhub_handle_encoder = map_name_to_handle[BERT_MODEL_NAME]\n",
+ "tfhub_handle_preprocess = map_model_to_preprocess[BERT_MODEL_NAME]\n",
"\n",
"print(f'BERT model selected : {tfhub_handle_encoder}')\n",
"print(f'Preprocess model auto-selected: {tfhub_handle_preprocess}')"
@@ -518,7 +525,7 @@
"id": "7WrcxxTRDdHi"
},
"source": [
- "## The preprocessing model\n",
+ "#### __The preprocessing model__\n",
"\n",
"Text inputs need to be transformed to numeric token ids and arranged in several Tensors before being input to BERT. TensorFlow Hub provides a matching preprocessing model for each of the BERT models discussed above, which implements this transformation using TF ops from the TF.text library. It is not necessary to run pure Python code outside your TensorFlow model to preprocess text.\n",
"\n",
@@ -529,7 +536,7 @@
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": 12,
"metadata": {
"id": "0SQi-jWd_jzq",
"tags": []
@@ -539,10 +546,10 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "2023-01-15 19:09:42.975269: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda/lib64:/usr/local/nccl2/lib:/usr/local/cuda/extras/CUPTI/lib64\n",
- "2023-01-15 19:09:42.975329: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)\n",
- "2023-01-15 19:09:42.975352: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (tensorflow-2-8-20230115-133740): /proc/driver/nvidia/version does not exist\n",
- "2023-01-15 19:09:42.975597: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n",
+ "2023-01-17 22:50:26.002258: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda/lib64:/usr/local/nccl2/lib:/usr/local/cuda/extras/CUPTI/lib64\n",
+ "2023-01-17 22:50:26.002306: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)\n",
+ "2023-01-17 22:50:26.002335: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (tensorflow-2-8-20230117-000657): /proc/driver/nvidia/version does not exist\n",
+ "2023-01-17 22:50:26.002591: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n",
"To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n"
]
}
@@ -562,16 +569,17 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "array(['Delete scheduled task'], dtype=object)"
+ "array(['Can you tell me how to change the scale of one of your microservices?'],\n",
+ " dtype=object)"
]
},
- "execution_count": 14,
+ "execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
@@ -582,7 +590,7 @@
},
{
"cell_type": "code",
- "execution_count": 15,
+ "execution_count": 14,
"metadata": {
"id": "r9-zCzJpnuwS",
"tags": []
@@ -592,10 +600,10 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "Keys : ['input_type_ids', 'input_word_ids', 'input_mask']\n",
+ "Keys : ['input_mask', 'input_type_ids', 'input_word_ids']\n",
"Shape : (1, 128)\n",
- "Word Ids : [ 101 3972 12870 5115 4708 102 0 0 0 0 0 0]\n",
- "Input Mask : [1 1 1 1 1 1 0 0 0 0 0 0]\n",
+ "Word Ids : [ 101 2064 2017 2425 2033 2129 2000 2689 1996 4094 1997 2028]\n",
+ "Input Mask : [1 1 1 1 1 1 1 1 1 1 1 1]\n",
"Type Ids : [0 0 0 0 0 0 0 0 0 0 0 0]\n"
]
}
@@ -632,14 +640,14 @@
"id": "DKnLPSEmtp9i"
},
"source": [
- "## Using the BERT model\n",
+ "#### __Using the BERT model__\n",
"\n",
"Before putting BERT into an own model, let's take a look at its outputs. You will load it from TF Hub and see the returned values."
]
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": 15,
"metadata": {
"id": "tXxYpK8ixL34",
"tags": []
@@ -651,7 +659,7 @@
},
{
"cell_type": "code",
- "execution_count": 17,
+ "execution_count": 16,
"metadata": {
"id": "_OoF9mebuSZc",
"tags": []
@@ -661,35 +669,35 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "Loaded BERT: https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-8_H-512_A-8/1\n",
- "Pooled Outputs Shape:(1, 512)\n",
- "Pooled Outputs Values:[ 0.38008717 -0.3713979 -0.99633324 -0.8828487 -0.4356928 -0.4016127\n",
- " -0.99928737 0.32494175 0.24131705 0.39659867 0.9951671 -0.11896375]\n",
- "Sequence Outputs Shape:(1, 128, 512)\n",
- "Sequence Outputs Values:[[-0.7442301 0.17604867 0.63783437 ... 0.513188 -1.2219555\n",
- " -0.00173689]\n",
- " [-0.54973364 0.26147014 -0.7926538 ... 0.24370176 -0.60001326\n",
- " 0.24907288]\n",
- " [-0.09852025 -0.24003302 -0.42945752 ... 0.29231924 -1.28544\n",
- " -0.24107009]\n",
+ "Loaded BERT : https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-8_H-512_A-8/1\n",
+ "Pooled Outputs Shape : (1, 512)\n",
+ "Pooled Outputs Values : [ 0.385889 -0.12248868 -0.9998633 0.28463823 -0.4075239 -0.39477754\n",
+ " -0.99803925 0.2665583 0.24860786 0.08505904 -0.34656683 0.1943462 ]\n",
+ "Sequence Outputs Shape : (1, 128, 512)\n",
+ "Sequence Outputs Values : [[-0.23372588 0.3678839 0.94593 ... -0.12252477 -0.43001646\n",
+ " 0.6094305 ]\n",
+ " [-0.39601198 -0.09191871 0.30907482 ... -0.6917538 1.4148049\n",
+ " -0.2261968 ]\n",
+ " [-1.4655601 -0.6329228 1.4872218 ... -0.44263726 -0.7672484\n",
+ " -0.14803627]\n",
" ...\n",
- " [ 0.10415229 -0.1520155 -0.23385802 ... 0.17001778 -0.54459953\n",
- " -0.22196133]\n",
- " [ 0.17499137 -0.06097127 0.54251975 ... 0.5958168 -0.37109143\n",
- " -0.10074075]\n",
- " [ 0.15013008 0.03962955 0.59219164 ... 0.3902613 -0.3109738\n",
- " 0.33535522]]\n"
+ " [-0.59902114 0.02120084 0.6990685 ... 0.9867604 -0.20310818\n",
+ " -0.4146583 ]\n",
+ " [-0.16003767 0.2527567 1.5589244 ... 0.30391887 1.2162194\n",
+ " -0.24888408]\n",
+ " [ 0.30644155 0.27735925 0.70691913 ... 0.53195554 0.14866629\n",
+ " 0.9018017 ]]\n"
]
}
],
"source": [
"bert_results = bert_model(text_preprocessed)\n",
"\n",
- "print(f'Loaded BERT: {tfhub_handle_encoder}')\n",
- "print(f'Pooled Outputs Shape:{bert_results[\"pooled_output\"].shape}')\n",
- "print(f'Pooled Outputs Values:{bert_results[\"pooled_output\"][0, :12]}')\n",
- "print(f'Sequence Outputs Shape:{bert_results[\"sequence_output\"].shape}')\n",
- "print(f'Sequence Outputs Values:{bert_results[\"sequence_output\"][0, :12]}')"
+ "print(f'Loaded BERT : {tfhub_handle_encoder}')\n",
+ "print(f'Pooled Outputs Shape : {bert_results[\"pooled_output\"].shape}')\n",
+ "print(f'Pooled Outputs Values : {bert_results[\"pooled_output\"][0, :12]}')\n",
+ "print(f'Sequence Outputs Shape : {bert_results[\"sequence_output\"].shape}')\n",
+ "print(f'Sequence Outputs Values : {bert_results[\"sequence_output\"][0, :12]}')"
]
},
{
@@ -713,7 +721,7 @@
"id": "pDNKfAXbDnJH"
},
"source": [
- "## Define your model\n",
+ "#### __Define the model__\n",
"\n",
"You will create a very simple fine-tuned model, with the preprocessing model, the selected BERT model, one Dense and a Dropout layer.\n",
"\n",
@@ -722,7 +730,7 @@
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": 17,
"metadata": {
"id": "aksj743St9ga",
"tags": []
@@ -752,7 +760,7 @@
},
{
"cell_type": "code",
- "execution_count": 19,
+ "execution_count": 18,
"metadata": {
"id": "mGMF8AZcB2Zy",
"tags": []
@@ -763,8 +771,8 @@
"output_type": "stream",
"text": [
"tf.Tensor(\n",
- "[[0.10404271 0.09260793 0.08020899 0.09814781 0.15087037 0.03379072\n",
- " 0.16705798 0.27327356]], shape=(1, 8), dtype=float32)\n"
+ "[[0.16331084 0.22407842 0.26904288 0.04798554 0.12082581 0.0503904\n",
+ " 0.09420813 0.03015799]], shape=(1, 8), dtype=float32)\n"
]
}
],
@@ -787,7 +795,7 @@
},
{
"cell_type": "code",
- "execution_count": 20,
+ "execution_count": 19,
"metadata": {},
"outputs": [
{
@@ -800,28 +808,28 @@
"==================================================================================================\n",
" text (InputLayer) [(None,)] 0 [] \n",
" \n",
- " preprocessing (KerasLayer) {'input_type_ids': 0 ['text[0][0]'] \n",
- " (None, 128), \n",
- " 'input_mask': (Non \n",
+ " preprocessing (KerasLayer) {'input_mask': (Non 0 ['text[0][0]'] \n",
" e, 128), \n",
" 'input_word_ids': \n",
+ " (None, 128), \n",
+ " 'input_type_ids': \n",
" (None, 128)} \n",
" \n",
- " BERT_encoder (KerasLayer) {'sequence_output': 41373185 ['preprocessing[0][0]', \n",
- " (None, 128, 512), 'preprocessing[0][1]', \n",
- " 'default': (None, 'preprocessing[0][2]'] \n",
- " 512), \n",
- " 'pooled_output': ( \n",
- " None, 512), \n",
- " 'encoder_outputs': \n",
- " [(None, 128, 512), \n",
+ " BERT_encoder (KerasLayer) {'encoder_outputs': 41373185 ['preprocessing[0][0]', \n",
+ " [(None, 128, 512), 'preprocessing[0][1]', \n",
+ " (None, 128, 512), 'preprocessing[0][2]'] \n",
" (None, 128, 512), \n",
" (None, 128, 512), \n",
" (None, 128, 512), \n",
" (None, 128, 512), \n",
" (None, 128, 512), \n",
+ " (None, 128, 512)], \n",
+ " 'sequence_output': \n",
" (None, 128, 512), \n",
- " (None, 128, 512)]} \n",
+ " 'default': (None, \n",
+ " 512), \n",
+ " 'pooled_output': ( \n",
+ " None, 512)} \n",
" \n",
" dropout (Dropout) (None, 512) 0 ['BERT_encoder[0][9]'] \n",
" \n",
@@ -845,9 +853,9 @@
"id": "WbUWoZMwc302"
},
"source": [
- "## Model training\n",
+ "#### __Model training__\n",
"\n",
- "You now have all the pieces to train a model, including the preprocessing module, BERT encoder, data, and classifier."
+ "You now have all the pieces to train a model, including the preprocessing module, BERT encoder, data, and classifier. Vertex AI Training can be used for parallelization, accelerated training, and hyperparameter tuning, however the \"baseline case\" of in-notebook training is provided below."
]
},
{
@@ -856,12 +864,12 @@
"id": "WpJ3xcwDT56v"
},
"source": [
- "Since this is a non-binary classification problem and the model outputs probabilities, you'll use `losses.CategoricalCrossentropy` loss function.\n"
+ "Since this is a non-binary classification problem and the model outputs probabilities, you'll use `losses.CategoricalCrossentropy` loss function."
]
},
{
"cell_type": "code",
- "execution_count": 21,
+ "execution_count": 20,
"metadata": {
"id": "OWPOZE-L3AgE",
"tags": []
@@ -878,22 +886,21 @@
"id": "SqlarlpC_v0g"
},
"source": [
- "### Loading the BERT model and training\n",
+ "#### __Loading the BERT model and training__\n",
"\n",
"Using the `classifier_model` you created earlier, you can compile the model with the loss, metric and optimizer."
]
},
{
"cell_type": "code",
- "execution_count": 22,
+ "execution_count": 21,
"metadata": {
"id": "-7GPDhR98jsD",
"tags": []
},
"outputs": [],
"source": [
- "epochs=5\n",
- "optimizer=tf.keras.optimizers.Adam(1e-5)\n",
+ "optimizer=tf.keras.optimizers.Adam(LEARNING_RATE)\n",
"classifier_model.compile(\n",
" optimizer=optimizer,\n",
" loss=loss,\n",
@@ -907,12 +914,12 @@
"id": "CpBuV5j2cS_b"
},
"source": [
- "Note: training time will vary depending on the complexity of the BERT model you have selected."
+ "Note: training time will vary depending on the complexity of the BERT model you have selected, the notebook environment compute/memory, and the availability of TPU/GPU accelerators. Again, Vertex AI Training could be used to scale and distribute training."
]
},
{
"cell_type": "code",
- "execution_count": 23,
+ "execution_count": 22,
"metadata": {
"id": "HtfDFAnN_Neu",
"scrolled": true,
@@ -923,36 +930,34 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "Training model with https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-8_H-512_A-8/1\n",
- "(367, 1)\n",
- "(367, 8)\n",
- "(41, 1)\n",
- "(41, 8)\n",
- "Epoch 1/5\n",
- "12/12 [==============================] - 95s 7s/step - loss: 2.1091 - categorical_accuracy: 0.1880 - val_loss: 1.7822 - val_categorical_accuracy: 0.3415\n",
- "Epoch 2/5\n",
- "12/12 [==============================] - 83s 7s/step - loss: 1.7212 - categorical_accuracy: 0.3951 - val_loss: 1.5522 - val_categorical_accuracy: 0.5854\n",
- "Epoch 3/5\n",
- "12/12 [==============================] - 82s 7s/step - loss: 1.4229 - categorical_accuracy: 0.6431 - val_loss: 1.3455 - val_categorical_accuracy: 0.6585\n",
- "Epoch 4/5\n",
- "12/12 [==============================] - 82s 7s/step - loss: 1.1552 - categorical_accuracy: 0.7575 - val_loss: 1.0439 - val_categorical_accuracy: 0.7805\n",
- "Epoch 5/5\n",
- "12/12 [==============================] - 83s 7s/step - loss: 0.8882 - categorical_accuracy: 0.8692 - val_loss: 0.7925 - val_categorical_accuracy: 0.8780\n"
+ "Training the transfer-learning model based on https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-8_H-512_A-8/1\n",
+ "Epoch 1/8\n",
+ "11/11 [==============================] - 96s 8s/step - loss: 2.1831 - categorical_accuracy: 0.1354 - val_loss: 1.9841 - val_categorical_accuracy: 0.2805\n",
+ "Epoch 2/8\n",
+ "11/11 [==============================] - 81s 7s/step - loss: 1.8310 - categorical_accuracy: 0.3285 - val_loss: 1.7547 - val_categorical_accuracy: 0.4634\n",
+ "Epoch 3/8\n",
+ "11/11 [==============================] - 81s 7s/step - loss: 1.5643 - categorical_accuracy: 0.5533 - val_loss: 1.4916 - val_categorical_accuracy: 0.6463\n",
+ "Epoch 4/8\n",
+ "11/11 [==============================] - 81s 7s/step - loss: 1.2815 - categorical_accuracy: 0.7349 - val_loss: 1.1838 - val_categorical_accuracy: 0.7805\n",
+ "Epoch 5/8\n",
+ "11/11 [==============================] - 80s 7s/step - loss: 1.0152 - categorical_accuracy: 0.8444 - val_loss: 0.9169 - val_categorical_accuracy: 0.8537\n",
+ "Epoch 6/8\n",
+ "11/11 [==============================] - 81s 7s/step - loss: 0.7800 - categorical_accuracy: 0.9193 - val_loss: 0.7026 - val_categorical_accuracy: 0.8902\n",
+ "Epoch 7/8\n",
+ "11/11 [==============================] - 80s 7s/step - loss: 0.5785 - categorical_accuracy: 0.9280 - val_loss: 0.5540 - val_categorical_accuracy: 0.8780\n",
+ "Epoch 8/8\n",
+ "11/11 [==============================] - 80s 7s/step - loss: 0.4360 - categorical_accuracy: 0.9597 - val_loss: 0.4612 - val_categorical_accuracy: 0.8902\n"
]
}
],
"source": [
- "print(f'Training model with {tfhub_handle_encoder}')\n",
- "print(train_features.shape)\n",
- "print(train_labels.shape)\n",
- "print(valid_features.shape)\n",
- "print(valid_labels.shape)\n",
+ "print(f'Training the transfer-learning model based on {tfhub_handle_encoder}')\n",
"history = classifier_model.fit(\n",
" x=train_features,\n",
" y=train_labels,\n",
" validation_data=(valid_features,valid_labels),\n",
- " batch_size=32,\n",
- " epochs=epochs\n",
+ " batch_size=BATCH_SIZE,\n",
+ " epochs=EPOCHS\n",
")"
]
},
@@ -962,14 +967,14 @@
"id": "uBthMlTSV8kn"
},
"source": [
- "### Evaluate the model\n",
+ "#### __Evaluate the model__\n",
"\n",
"Let's see how the model performs. Two values will be returned. Loss (a number which represents the error, lower values are better), and accuracy."
]
},
{
"cell_type": "code",
- "execution_count": 24,
+ "execution_count": 23,
"metadata": {
"id": "slqB-urBV9sP",
"tags": []
@@ -979,9 +984,9 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "4/4 [==============================] - 7s 2s/step - loss: 0.6393 - categorical_accuracy: 0.9029\n",
- "Loss: 0.6392922401428223\n",
- "Accuracy: 0.9029126167297363\n"
+ "3/3 [==============================] - 5s 2s/step - loss: 0.3591 - categorical_accuracy: 0.9146\n",
+ "Loss: 0.35913094878196716\n",
+ "Accuracy: 0.9146341681480408\n"
]
}
],
@@ -998,39 +1003,32 @@
"id": "uttWpgmSfzq9"
},
"source": [
- "### Plot the accuracy and loss over time\n",
+ "#### __Plot the accuracy and loss over time__\n",
"\n",
"Based on the `History` object returned by `model.fit()`. You can plot the training and validation loss for comparison, as well as the training and validation accuracy:"
]
},
{
"cell_type": "code",
- "execution_count": 25,
+ "execution_count": 24,
"metadata": {
"id": "fiythcODf0xo",
"tags": []
},
"outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "dict_keys(['loss', 'categorical_accuracy', 'val_loss', 'val_categorical_accuracy'])\n"
- ]
- },
{
"data": {
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 25,
+ "execution_count": 24,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
- "image/png": "\n",
+ "image/png": "\n",
"text/plain": [
""
]
@@ -1041,7 +1039,6 @@
],
"source": [
"history_dict = history.history\n",
- "print(history_dict.keys())\n",
"\n",
"acc = history_dict['categorical_accuracy']\n",
"val_acc = history_dict['val_categorical_accuracy']\n",
@@ -1093,12 +1090,25 @@
},
{
"cell_type": "code",
- "execution_count": 26,
+ "execution_count": 25,
"metadata": {
"id": "VBWzH6exlCPS",
"tags": []
},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "input: Hey can I please see the documentation : estimated intent: documentation\n",
+ "input: Could use some help with a cron job : estimated intent: destroy cron\n",
+ "input: How do I deprovision you on the weekends : estimated intent: destroy cron\n",
+ "input: I have a big issue with that response. : estimated intent: report issue\n",
+ "input: How can I scale your microservices to be more cost-effective? : estimated intent: self scale\n",
+ "\n"
+ ]
+ }
+ ],
"source": [
"def print_my_examples(inputs, results):\n",
" result_for_printing = \\\n",
@@ -1107,68 +1117,1940 @@
" print(*result_for_printing, sep='\\n')\n",
" print()\n",
"\n",
- "\n",
"examples = [\n",
" 'Hey can I please see the documentation',\n",
" 'Could use some help with a cron job',\n",
" 'How do I deprovision you on the weekends',\n",
" 'I have a big issue with that response.',\n",
- " 'How can I scale your microservices to be more cost-effective?',\n",
- " 'I need to run some unit tests'\n",
+ " 'How can I scale your microservices to be more cost-effective?'\n",
"]\n",
"\n",
- "results = tf.nn.softmax(classifier_model(tf.constant(examples)))"
+ "results = tf.nn.softmax(classifier_model(tf.constant(examples)))\n",
+ "intents = binarizer.inverse_transform(results.numpy())\n",
+ "print_my_examples(examples, intents)"
]
},
{
- "cell_type": "code",
- "execution_count": 27,
+ "cell_type": "markdown",
"metadata": {},
- "outputs": [],
"source": [
- "intents=binarizer.inverse_transform(results.numpy())"
+ "#### __Save the model__"
]
},
{
"cell_type": "code",
- "execution_count": 28,
- "metadata": {
- "id": "VBWzH6exlCPS",
- "tags": []
- },
+ "execution_count": 26,
+ "metadata": {},
"outputs": [
{
- "name": "stdout",
+ "name": "stderr",
"output_type": "stream",
"text": [
- "input: Hey can I please see the documentation : estimated intent: documentation\n",
- "input: Could use some help with a cron job : estimated intent: destroy cron\n",
- "input: How do I deprovision you on the weekends : estimated intent: documentation\n",
- "input: I have a big issue with that response. : estimated intent: report issue\n",
- "input: How can I scale your microservices to be more cost-effective? : estimated intent: self scale\n",
- "input: I need to run some unit tests : estimated intent: report issue\n",
- "\n"
+ "2023-01-17 23:02:42.986410: W tensorflow/python/util/util.cc:368] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.\n",
+ "WARNING:absl:Found untraced functions such as restored_function_body, restored_function_body, restored_function_body, restored_function_body, restored_function_body while saving (showing 5 of 244). These functions will not be directly callable after loading.\n"
]
}
],
"source": [
- "print_my_examples(examples, intents)"
+ "classifier_model.save('service/model')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We have taken a \"DIY\" and notebook-centric approach to data preparation, model definition, and training. Continuing this approach would involve engineering \"taking over\" at this point, for versioning and productionalization. However, we can take a more platformized approach with incremental additions of Vertex AI (initially, just for metadata, then for more)."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "### Save the model"
+ "## Adding the custom model to Vertex AI"
]
},
{
"cell_type": "code",
- "execution_count": 29,
+ "execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
- "classifier_model.save('service/intents.keras')"
+ "from google.cloud import aiplatform\n",
+ "from google.cloud import storage\n",
+ "\n",
+ "# Vertex AI initialization\n",
+ "aiplatform.init(\n",
+ " project=GCP_PROJECT,\n",
+ " location=GCP_REGION,\n",
+ " experiment=EXPERIMENT_NAME,\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### __Create an experiment run for the custom model__"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Associating projects/12345678910/locations/us-central1/metadataStores/default/contexts/isidro-intents-compare-custom-automl-bqml-run-custom-r1 to Experiment: isidro-intents-compare-custom-automl-bqml\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.metadata.experiment_resources:Associating projects/12345678910/locations/us-central1/metadataStores/default/contexts/isidro-intents-compare-custom-automl-bqml-run-custom-r1 to Experiment: isidro-intents-compare-custom-automl-bqml\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 28,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Start an experiment run (used to compare against other models like BQML and AutoML)\n",
+ "aiplatform.start_run(f\"run-custom-{VERTEX_MODEL_ROUND}\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Add experiment run parameters\n",
+ "metaparams = {}\n",
+ "metaparams[\"epochs\"] = EPOCHS\n",
+ "metaparams[\"learning rate\"] = 1e-5\n",
+ "metaparams[\"base model\"] = BERT_MODEL_NAME\n",
+ "aiplatform.log_params(metaparams)\n",
+ "\n",
+ "# Add experiment run metrics\n",
+ "metrics = {}\n",
+ "metrics[\"cross-entropy loss\"] = history_dict[\"val_loss\"][-1]\n",
+ "metrics[\"categorical accuracy\"] = history_dict[\"val_categorical_accuracy\"][-1]\n",
+ "aiplatform.log_metrics(metrics)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sklearn.metrics import confusion_matrix\n",
+ "\n",
+ "# Use test dataset to compile a confusion matrix\n",
+ "test_copy = test_df.copy()\n",
+ "test_labels = test_copy.pop(\"intent\")\n",
+ "test_features = test_copy.values\n",
+ "test_results = tf.nn.softmax(classifier_model(tf.constant(test_features)))\n",
+ "test_results = binarizer.inverse_transform(test_results.numpy())\n",
+ "\n",
+ "labels = list(set(test_labels))\n",
+ "y_true = list(test_labels)\n",
+ "y_pred = list(test_results)\n",
+ "\n",
+ "classification_metrics = {\n",
+ " \"matrix\": confusion_matrix(y_true, y_pred, labels=labels).tolist(),\n",
+ " \"labels\": labels,\n",
+ "}\n",
+ "\n",
+ "# Add the confusion matrix to the experiment run\n",
+ "aiplatform.log_classification_metrics(\n",
+ " labels=classification_metrics[\"labels\"],\n",
+ " matrix=classification_metrics[\"matrix\"],\n",
+ " display_name=\"intents confusion matrix\",\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 32,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Creating Model\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.models:Creating Model\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Create Model backing LRO: projects/12345678910/locations/us-central1/models/1240990186968449024/operations/8960009109030043648\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.models:Create Model backing LRO: projects/12345678910/locations/us-central1/models/1240990186968449024/operations/8960009109030043648\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Model created. Resource name: projects/12345678910/locations/us-central1/models/1240990186968449024@1\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.models:Model created. Resource name: projects/12345678910/locations/us-central1/models/1240990186968449024@1\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "To use this Model in another session:\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.models:To use this Model in another session:\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "model = aiplatform.Model('projects/12345678910/locations/us-central1/models/1240990186968449024@1')\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.models:model = aiplatform.Model('projects/12345678910/locations/us-central1/models/1240990186968449024@1')\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Tie the dataset to the model training and experiment run\n",
+ "storage_client = storage.Client()\n",
+ "bucket = storage_client.bucket(GCP_BUCKET)\n",
+ "blob = bucket.blob(f\"custom-{VERTEX_MODEL_ROUND}/{DATA_FILE}\")\n",
+ "blob.upload_from_filename(DATA_FILE) # raw data\n",
+ "\n",
+ "training_data_artifact = aiplatform.Artifact.create(\n",
+ " schema_title='system.Dataset',\n",
+ " uri=f'gs://{GCP_BUCKET}/custom-{VERTEX_MODEL_ROUND}/{DATA_FILE}',\n",
+ " display_name='labeled intents data'\n",
+ ")\n",
+ "\n",
+ "with aiplatform.start_execution(\n",
+ " schema_title=\"system.ContainerExecution\", \n",
+ " display_name='training'\n",
+ ") as execution:\n",
+ " execution.assign_input_artifacts([training_data_artifact])\n",
+ "\n",
+ " # Upload the model to GCS\n",
+ " storage_client = storage.Client()\n",
+ " bucket = storage_client.bucket(GCP_BUCKET)\n",
+ " for model_file in [\n",
+ " \"saved_model.pb\",\n",
+ " \"keras_metadata.pb\",\n",
+ " \"assets/vocab.txt\",\n",
+ " \"variables/variables.index\",\n",
+ " \"variables/variables.data-00000-of-00001\"\n",
+ " ]:\n",
+ " blob = bucket.blob(f\"custom-{VERTEX_MODEL_ROUND}/{model_file}\")\n",
+ " blob.upload_from_filename(\"service/model/\" + model_file)\n",
+ "\n",
+ " # Create a model in the registry\n",
+ " model = aiplatform.Model.upload(\n",
+ " display_name=f\"{VERTEX_MODEL_NAME_PREFIX}_custom\",\n",
+ " artifact_uri=f\"gs://{GCP_BUCKET}/custom-{VERTEX_MODEL_ROUND}\",\n",
+ " description=VERTEX_MODEL_DESCRIPTION,\n",
+ " serving_container_image_uri=\"us-docker.pkg.dev/vertex-ai/prediction/tf2-cpu.2-8:latest\"\n",
+ " )\n",
+ "\n",
+ " model.wait()\n",
+ "\n",
+ " execution.assign_output_artifacts([model])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 33,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "aiplatform.end_run()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Create an experiment run using BQML"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Typically, you'd just run a few lines of SQL code in the BigQuery SQL Workspace, but using the Python SDK here for consistency sake"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 34,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Associating projects/12345678910/locations/us-central1/metadataStores/default/contexts/isidro-intents-compare-custom-automl-bqml-run-bqml-r1 to Experiment: isidro-intents-compare-custom-automl-bqml\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.metadata.experiment_resources:Associating projects/12345678910/locations/us-central1/metadataStores/default/contexts/isidro-intents-compare-custom-automl-bqml-run-bqml-r1 to Experiment: isidro-intents-compare-custom-automl-bqml\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 34,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "aiplatform.start_run(f\"run-bqml-{VERTEX_MODEL_ROUND}\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 35,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Upload training data to GCS\n",
+ "storage_client = storage.Client()\n",
+ "bucket = storage_client.bucket(GCP_BUCKET)\n",
+ "blob = bucket.blob(f\"bqml-{VERTEX_MODEL_ROUND}/{DATA_FILE}\")\n",
+ "blob.upload_from_filename(DATA_FILE) # raw data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Dataset already exists - choose a new name if the dataset is not under your control\n",
+ "Loaded 511 rows.\n"
+ ]
+ }
+ ],
+ "source": [
+ "from google.cloud import bigquery\n",
+ "\n",
+ "client = bigquery.Client()\n",
+ "\n",
+ "# Create the BigQuery dataset\n",
+ "dataset_id = f\"{GCP_PROJECT}.{BQ_DATASET}\"\n",
+ "table_id = f\"{GCP_PROJECT}.{BQ_DATASET}.raw_data\"\n",
+ "dataset = bigquery.Dataset(dataset_id)\n",
+ "dataset.location = \"US\"\n",
+ "try:\n",
+ " dataset = client.create_dataset(dataset, timeout=30) # Make an API request.\n",
+ " print(\"Created dataset {}.{}\".format(client.project, dataset.dataset_id))\n",
+ "except exceptions.Conflict:\n",
+ " print(\"Dataset already exists - choose a new name if the dataset is not under your control\")\n",
+ " pass\n",
+ "\n",
+ "# Import data to the BigQuery dataset\n",
+ "job_config = bigquery.LoadJobConfig(\n",
+ " schema=[\n",
+ " bigquery.SchemaField(\"text\", \"STRING\"),\n",
+ " bigquery.SchemaField(\"intent\", \"STRING\"),\n",
+ " ],\n",
+ " skip_leading_rows=1,\n",
+ " # The source format defaults to CSV, so the line below is optional.\n",
+ " source_format=bigquery.SourceFormat.CSV,\n",
+ ")\n",
+ "uri = f\"gs://{GCP_BUCKET}/bqml-{VERTEX_MODEL_ROUND}/{DATA_FILE}\"\n",
+ "load_job = client.load_table_from_uri(\n",
+ " uri, table_id, job_config=job_config\n",
+ ") # Make an API request.\n",
+ "load_job.result() # Waits for the job to complete.\n",
+ "\n",
+ "# Confirm the loaded data\n",
+ "destination_table = client.get_table(table_id) # Make an API request.\n",
+ "print(\"Loaded {} rows.\".format(destination_table.num_rows))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 37,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Create the BQML model\n",
+ "MODEL_QUERY = f\"\"\"\n",
+ "CREATE OR REPLACE MODEL\n",
+ " `{BQ_DATASET}.{VERTEX_MODEL_NAME_PREFIX}_bqml`\n",
+ "OPTIONS\n",
+ " (\n",
+ " model_type='LOGISTIC_REG',\n",
+ " auto_class_weights=TRUE,\n",
+ " data_split_method='RANDOM',\n",
+ " data_split_eval_fraction = .10,\n",
+ " input_label_cols=['intent'],\n",
+ " model_registry='vertex_ai'\n",
+ " ) AS\n",
+ "SELECT \n",
+ " ML.NGRAMS(intents.words_array, [1,2]) as ngrams, \n",
+ " intents.intent\n",
+ "FROM (\n",
+ " SELECT \n",
+ " REGEXP_EXTRACT_ALL(LOWER(raw.text), '[a-z]+') as words_array,\n",
+ " raw.intent\n",
+ " FROM `{BQ_DATASET}.raw_data` raw\n",
+ ") intents\n",
+ "\"\"\"\n",
+ "job = client.query(MODEL_QUERY)\n",
+ "job.result()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 38,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Evaluate the BQML model\n",
+ "EVALUATE_QUERY = f\"\"\"\n",
+ "SELECT accuracy, log_loss FROM ML.EVALUATE(MODEL `{BQ_DATASET}.{VERTEX_MODEL_NAME_PREFIX}_bqml`)\n",
+ "\"\"\"\n",
+ "job = client.query(EVALUATE_QUERY)\n",
+ "rows = job.result()\n",
+ "results = next(rows)\n",
+ "\n",
+ "# Experiment run parameters\n",
+ "metaparams = {}\n",
+ "metaparams[\"epochs\"] = \"N/A\"\n",
+ "metaparams[\"learning rate\"] = \"N/A\"\n",
+ "metaparams[\"base model\"] = \"N/A\"\n",
+ "aiplatform.log_params(metaparams)\n",
+ "\n",
+ "# Experiment run metrics\n",
+ "metrics = {}\n",
+ "metrics[\"cross-entropy loss\"] = results.log_loss\n",
+ "metrics[\"categorical accuracy\"] = results.accuracy\n",
+ "aiplatform.log_metrics(metrics)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Add the model to the experiment run\n",
+ "with aiplatform.start_execution(\n",
+ " schema_title=\"system.ContainerExecution\", \n",
+ " display_name='training'\n",
+ ") as execution:\n",
+ " execution.assign_output_artifacts([aiplatform.Model(model_name=f\"{VERTEX_MODEL_NAME_PREFIX}_bqml\")])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 41,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "aiplatform.end_run()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Create an experiment run using AutoML"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Part of the value of AutoML is that it's no-code. The Python below is only for consistency sake (consider other mechanisms like the Cloud Console GUI for easier setup)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 42,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Associating projects/12345678910/locations/us-central1/metadataStores/default/contexts/isidro-intents-compare-custom-automl-bqml-run-automl-r1 to Experiment: isidro-intents-compare-custom-automl-bqml\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.metadata.experiment_resources:Associating projects/12345678910/locations/us-central1/metadataStores/default/contexts/isidro-intents-compare-custom-automl-bqml-run-automl-r1 to Experiment: isidro-intents-compare-custom-automl-bqml\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 42,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "aiplatform.start_run(f\"run-automl-{VERTEX_MODEL_ROUND}\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 43,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Remove the headers from the CSV file\n",
+ "headerless_data = \"\"\n",
+ "with open(DATA_FILE, 'r') as data_file:\n",
+ " next(data_file) # Skip the header row\n",
+ " for line in data_file:\n",
+ " headerless_data += line\n",
+ "\n",
+ "# Upload data and schema to GCS\n",
+ "storage_client = storage.Client()\n",
+ "bucket = storage_client.bucket(GCP_BUCKET)\n",
+ "bucket.blob(f\"automl-{VERTEX_MODEL_ROUND}/headerless-{DATA_FILE}\").upload_from_string(headerless_data, 'text/csv')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 44,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Creating TextDataset\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.datasets.dataset:Creating TextDataset\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Create TextDataset backing LRO: projects/12345678910/locations/us-central1/datasets/7477763187341787136/operations/8187641772936003584\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.datasets.dataset:Create TextDataset backing LRO: projects/12345678910/locations/us-central1/datasets/7477763187341787136/operations/8187641772936003584\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "TextDataset created. Resource name: projects/12345678910/locations/us-central1/datasets/7477763187341787136\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.datasets.dataset:TextDataset created. Resource name: projects/12345678910/locations/us-central1/datasets/7477763187341787136\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "To use this TextDataset in another session:\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.datasets.dataset:To use this TextDataset in another session:\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "ds = aiplatform.TextDataset('projects/12345678910/locations/us-central1/datasets/7477763187341787136')\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.datasets.dataset:ds = aiplatform.TextDataset('projects/12345678910/locations/us-central1/datasets/7477763187341787136')\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Importing TextDataset data: projects/12345678910/locations/us-central1/datasets/7477763187341787136\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.datasets.dataset:Importing TextDataset data: projects/12345678910/locations/us-central1/datasets/7477763187341787136\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Import TextDataset data backing LRO: projects/12345678910/locations/us-central1/datasets/7477763187341787136/operations/2267660062757486592\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.datasets.dataset:Import TextDataset data backing LRO: projects/12345678910/locations/us-central1/datasets/7477763187341787136/operations/2267660062757486592\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "TextDataset data imported. Resource name: projects/12345678910/locations/us-central1/datasets/7477763187341787136\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.datasets.dataset:TextDataset data imported. Resource name: projects/12345678910/locations/us-central1/datasets/7477763187341787136\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "View Training:\n",
+ "https://console.cloud.google.com/ai/platform/locations/us-central1/training/7351014922865606656?project=12345678910\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.training_jobs:View Training:\n",
+ "https://console.cloud.google.com/ai/platform/locations/us-central1/training/7351014922865606656?project=12345678910\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.training_jobs:AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.training_jobs:AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.training_jobs:AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.training_jobs:AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.training_jobs:AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.training_jobs:AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.training_jobs:AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.training_jobs:AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.training_jobs:AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.training_jobs:AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.training_jobs:AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.training_jobs:AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.training_jobs:AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.training_jobs:AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.training_jobs:AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.training_jobs:AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.training_jobs:AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.training_jobs:AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.training_jobs:AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.training_jobs:AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.training_jobs:AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.training_jobs:AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.training_jobs:AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.training_jobs:AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.training_jobs:AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.training_jobs:AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.training_jobs:AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.training_jobs:AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.training_jobs:AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.training_jobs:AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.training_jobs:AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.training_jobs:AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.training_jobs:AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.training_jobs:AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.training_jobs:AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.training_jobs:AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.training_jobs:AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.training_jobs:AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.training_jobs:AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.training_jobs:AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.training_jobs:AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.training_jobs:AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.training_jobs:AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.training_jobs:AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.training_jobs:AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.training_jobs:AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.training_jobs:AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.training_jobs:AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.training_jobs:AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.training_jobs:AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.training_jobs:AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.training_jobs:AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.training_jobs:AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.training_jobs:AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.training_jobs:AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.training_jobs:AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.training_jobs:AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.training_jobs:AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.training_jobs:AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.training_jobs:AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.training_jobs:AutoMLTextTrainingJob projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656 current state:\n",
+ "PipelineState.PIPELINE_STATE_RUNNING\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AutoMLTextTrainingJob run completed. Resource name: projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.training_jobs:AutoMLTextTrainingJob run completed. Resource name: projects/12345678910/locations/us-central1/trainingPipelines/7351014922865606656\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Model available at projects/12345678910/locations/us-central1/models/684373420523126784\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:google.cloud.aiplatform.training_jobs:Model available at projects/12345678910/locations/us-central1/models/684373420523126784\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Create a Managed Dataset\n",
+ "text_dataset = aiplatform.TextDataset.create(\n",
+ " display_name=f\"{VERTEX_MODEL_NAME_PREFIX}_automl\",\n",
+ " gcs_source=f\"gs://{GCP_BUCKET}/automl-{VERTEX_MODEL_ROUND}/headerless-{DATA_FILE}\",\n",
+ " import_schema_uri=aiplatform.schema.dataset.ioformat.text.single_label_classification\n",
+ ")\n",
+ "\n",
+ "# Train an AutoML model off the Managed Dataset\n",
+ "job = aiplatform.AutoMLTextTrainingJob(\n",
+ " display_name=f\"{VERTEX_MODEL_NAME_PREFIX}_automl\",\n",
+ " prediction_type=\"classification\",\n",
+ " multi_label=False,\n",
+ ")\n",
+ "model = job.run(\n",
+ " dataset=text_dataset,\n",
+ " model_display_name=f\"{VERTEX_MODEL_NAME_PREFIX}_automl\",\n",
+ " training_fraction_split=0.68,\n",
+ " validation_fraction_split=0.16,\n",
+ " test_fraction_split=0.16,\n",
+ ")\n",
+ "\n",
+ "# Add the model to the experiment run\n",
+ "with aiplatform.start_execution(\n",
+ " schema_title=\"system.ContainerExecution\", \n",
+ " display_name='training'\n",
+ ") as execution:\n",
+ " execution.assign_output_artifacts([model])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 46,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Add experiment run parameters\n",
+ "metaparams = {}\n",
+ "metaparams[\"epochs\"] = \"N/A\"\n",
+ "metaparams[\"learning rate\"] = \"N/A\"\n",
+ "metaparams[\"base model\"] = \"N/A\"\n",
+ "aiplatform.log_params(metaparams)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 47,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Add experiment run metrics\n",
+ "model_evaluations = model.list_model_evaluations()\n",
+ "for model_evaluation in model_evaluations:\n",
+ " log_loss = model_evaluation.metrics[\"logLoss\"]\n",
+ "metrics = {}\n",
+ "metrics[\"cross-entropy loss\"] = log_loss\n",
+ "aiplatform.log_metrics(metrics)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 48,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "aiplatform.end_run()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Experiment analysis (across custom model, BQML, and AutoML)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 49,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " experiment_name | \n",
+ " run_name | \n",
+ " run_type | \n",
+ " state | \n",
+ " param.epochs | \n",
+ " param.base model | \n",
+ " param.learning rate | \n",
+ " metric.cross-entropy loss | \n",
+ " metric.categorical accuracy | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " isidro-intents-compare-custom-automl-bqml | \n",
+ " run-automl-r1 | \n",
+ " system.ExperimentRun | \n",
+ " COMPLETE | \n",
+ " N/A | \n",
+ " N/A | \n",
+ " N/A | \n",
+ " 0.035308 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " isidro-intents-compare-custom-automl-bqml | \n",
+ " run-bqml-r1 | \n",
+ " system.ExperimentRun | \n",
+ " COMPLETE | \n",
+ " N/A | \n",
+ " N/A | \n",
+ " N/A | \n",
+ " 0.036435 | \n",
+ " 0.981132 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " isidro-intents-compare-custom-automl-bqml | \n",
+ " run-custom-r1 | \n",
+ " system.ExperimentRun | \n",
+ " COMPLETE | \n",
+ " 8.0 | \n",
+ " small_bert/bert_en_uncased_L-8_H-512_A-8 | \n",
+ " 0.00001 | \n",
+ " 0.461158 | \n",
+ " 0.890244 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " experiment_name run_name \\\n",
+ "0 isidro-intents-compare-custom-automl-bqml run-automl-r1 \n",
+ "1 isidro-intents-compare-custom-automl-bqml run-bqml-r1 \n",
+ "2 isidro-intents-compare-custom-automl-bqml run-custom-r1 \n",
+ "\n",
+ " run_type state param.epochs \\\n",
+ "0 system.ExperimentRun COMPLETE N/A \n",
+ "1 system.ExperimentRun COMPLETE N/A \n",
+ "2 system.ExperimentRun COMPLETE 8.0 \n",
+ "\n",
+ " param.base model param.learning rate \\\n",
+ "0 N/A N/A \n",
+ "1 N/A N/A \n",
+ "2 small_bert/bert_en_uncased_L-8_H-512_A-8 0.00001 \n",
+ "\n",
+ " metric.cross-entropy loss metric.categorical accuracy \n",
+ "0 0.035308 NaN \n",
+ "1 0.036435 0.981132 \n",
+ "2 0.461158 0.890244 "
+ ]
+ },
+ "execution_count": 49,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "experiment_df = aiplatform.get_experiment_df()\n",
+ "experiment_df = experiment_df[experiment_df.experiment_name == EXPERIMENT_NAME]\n",
+ "experiment_df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 50,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[ \n",
+ "resource name: projects/12345678910/locations/us-central1/metadataStores/default/artifacts/09422771-6759-45e5-aeb2-c7155b2b0144\n",
+ "uri: https://us-central1-aiplatform.googleapis.com/v1/projects/12345678910/locations/us-central1/models/1240990186968449024@1\n",
+ "schema_title:google.VertexModel, \n",
+ "resource name: projects/12345678910/locations/us-central1/metadataStores/default/artifacts/7b92f9ec-ef9b-4e17-ad28-b1b4a026f200\n",
+ "uri: gs://isidro_intent_classification/custom-r1/quality.csv\n",
+ "schema_title:system.Dataset, \n",
+ "resource name: projects/12345678910/locations/us-central1/metadataStores/default/artifacts/74feb581-a4a4-4fa5-8575-80cd25142268\n",
+ "uri: gs://isidro_intent_classification/custom-r1/quality.csv\n",
+ "schema_title:system.Dataset, \n",
+ "resource name: projects/12345678910/locations/us-central1/metadataStores/default/artifacts/e922980d-ea02-4fbc-a5b6-d529918211b6\n",
+ "uri: \n",
+ "schema_title:google.ClassificationMetrics]\n",
+ "{'categorical accuracy': 0.8902438879013062, 'cross-entropy loss': 0.4611583352088928}\n",
+ "{'learning rate': 1e-05, 'epochs': 8.0, 'base model': 'small_bert/bert_en_uncased_L-8_H-512_A-8'}\n",
+ "Empty DataFrame\n",
+ "Columns: []\n",
+ "Index: []\n",
+ "[{'id': 'e922980d-ea02-4fbc-a5b6-d529918211b6', 'display_name': 'intents confusion matrix', 'labels': ['destroy cron', 'create cron', 'self scale', 'report issue', 'create windows', 'self destruct', 'create workstation', 'documentation'], 'matrix': [[5.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 5.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 17.0, 0.0, 0.0, 1.0, 0.0, 0.0], [0.0, 0.0, 1.0, 12.0, 0.0, 1.0, 0.0, 1.0], [0.0, 0.0, 0.0, 0.0, 5.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 10.0, 0.0, 1.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 7.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 14.0]]}]\n"
+ ]
+ }
+ ],
+ "source": [
+ "experiment_run = aiplatform.ExperimentRun(\n",
+ " run_name=f\"run-custom-{VERTEX_MODEL_ROUND}\",\n",
+ " experiment=EXPERIMENT_NAME,\n",
+ ")\n",
+ "\n",
+ "print(experiment_run.get_artifacts())\n",
+ "print(experiment_run.get_metrics())\n",
+ "print(experiment_run.get_params())\n",
+ "print(experiment_run.get_time_series_data_frame())\n",
+ "print(experiment_run.get_classification_metrics())"
]
}
],
diff --git a/intents/service/requirements.txt b/intents/service/requirements.txt
index f1db98f..65a85d5 100644
--- a/intents/service/requirements.txt
+++ b/intents/service/requirements.txt
@@ -1,4 +1,5 @@
flask==2.2.2
+google-cloud-aiplatform==1.18.3
gunicorn==20.1.0
tensorflow==2.8.4
tensorflow_hub==0.12.0