From e811c62781b2c39d562830a8e71393531a2603f5 Mon Sep 17 00:00:00 2001
From: BlackMagicKau <65220482+BlackMagicKau@users.noreply.github.com>
Date: Thu, 13 Jul 2023 15:44:10 +0800
Subject: [PATCH] Dev (#483)

* vertex ai adapter

Signed-off-by: BlackMagicKau <kyadunath@gmail.com>

* Vertex AI caching showcase

Signed-off-by: BlackMagicKau <kyadunath@gmail.com>

* added index.rst and and modified bak file

Signed-off-by: BlackMagicKau <kyadunath@gmail.com>

---------

Signed-off-by: BlackMagicKau <kyadunath@gmail.com>
---
 docs/bootcamp/vertex/index.rst              |   9 +
 docs/bootcamp/vertex/vertexai_caching.ipynb | 509 ++++++++++++++++++++
 docs/toc.bak                                |   1 +
 3 files changed, 519 insertions(+)
 create mode 100644 docs/bootcamp/vertex/index.rst
 create mode 100644 docs/bootcamp/vertex/vertexai_caching.ipynb

diff --git a/docs/bootcamp/vertex/index.rst b/docs/bootcamp/vertex/index.rst
new file mode 100644
index 00000000..87b53d96
--- /dev/null
+++ b/docs/bootcamp/vertex/index.rst
@@ -0,0 +1,9 @@
+Vertex
+=================
+
+.. toctree::
+   :maxdepth: 1
+   :caption: Contents:
+
+
+   vertexai_caching
\ No newline at end of file
diff --git a/docs/bootcamp/vertex/vertexai_caching.ipynb b/docs/bootcamp/vertex/vertexai_caching.ipynb
new file mode 100644
index 00000000..b049bd5d
--- /dev/null
+++ b/docs/bootcamp/vertex/vertexai_caching.ipynb
@@ -0,0 +1,509 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "id": "6b3ba1cc",
+      "metadata": {
+        "id": "6b3ba1cc"
+      },
+      "source": [
+        "# Caching chat response\n",
+        "\n",
+        "This notebook is to show you how to use Vertex AI to answer questions and teach you how to cache the  response for exact and similar matches with **gptcache**. It is relatively simple, you just need to add an extra step to initialize the cache.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!pip install google-cloud-aiplatform"
+      ],
+      "metadata": {
+        "id": "4aAjAIFCzny5"
+      },
+      "id": "4aAjAIFCzny5",
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "! pip install -q gptcache langchain"
+      ],
+      "metadata": {
+        "id": "CRm8Zv2O_Hlp",
+        "outputId": "b999183d-f660-4622-c859-cab41c544770",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 829
+        }
+      },
+      "id": "CRm8Zv2O_Hlp",
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Collecting google-cloud-aiplatform\n",
+            "  Downloading google_cloud_aiplatform-1.27.0-py2.py3-none-any.whl (2.6 MB)\n",
+            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.6/2.6 MB\u001b[0m \u001b[31m39.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[?25hRequirement already satisfied: google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0 in /usr/local/lib/python3.10/dist-packages (from google-cloud-aiplatform) (2.11.1)\n",
+            "Requirement already satisfied: proto-plus<2.0.0dev,>=1.22.0 in /usr/local/lib/python3.10/dist-packages (from google-cloud-aiplatform) (1.22.3)\n",
+            "Requirement already satisfied: protobuf!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.19.5 in /usr/local/lib/python3.10/dist-packages (from google-cloud-aiplatform) (3.20.3)\n",
+            "Requirement already satisfied: packaging>=14.3 in /usr/local/lib/python3.10/dist-packages (from google-cloud-aiplatform) (23.1)\n",
+            "Requirement already satisfied: google-cloud-storage<3.0.0dev,>=1.32.0 in /usr/local/lib/python3.10/dist-packages (from google-cloud-aiplatform) (2.8.0)\n",
+            "Requirement already satisfied: google-cloud-bigquery<4.0.0dev,>=1.15.0 in /usr/local/lib/python3.10/dist-packages (from google-cloud-aiplatform) (3.10.0)\n",
+            "Collecting google-cloud-resource-manager<3.0.0dev,>=1.3.3 (from google-cloud-aiplatform)\n",
+            "  Downloading google_cloud_resource_manager-1.10.2-py2.py3-none-any.whl (321 kB)\n",
+            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m321.3/321.3 kB\u001b[0m \u001b[31m32.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[?25hCollecting shapely<2.0.0 (from google-cloud-aiplatform)\n",
+            "  Downloading Shapely-1.8.5.post1-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (2.0 MB)\n",
+            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m65.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[?25hRequirement already satisfied: googleapis-common-protos<2.0.dev0,>=1.56.2 in /usr/local/lib/python3.10/dist-packages (from google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-cloud-aiplatform) (1.59.1)\n",
+            "Requirement already satisfied: google-auth<3.0.dev0,>=2.14.1 in /usr/local/lib/python3.10/dist-packages (from google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-cloud-aiplatform) (2.17.3)\n",
+            "Requirement already satisfied: requests<3.0.0.dev0,>=2.18.0 in /usr/local/lib/python3.10/dist-packages (from google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-cloud-aiplatform) (2.27.1)\n",
+            "Requirement already satisfied: grpcio<2.0dev,>=1.33.2 in /usr/local/lib/python3.10/dist-packages (from google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-cloud-aiplatform) (1.56.0)\n",
+            "Requirement already satisfied: grpcio-status<2.0.dev0,>=1.33.2 in /usr/local/lib/python3.10/dist-packages (from google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-cloud-aiplatform) (1.48.2)\n",
+            "Requirement already satisfied: google-cloud-core<3.0.0dev,>=1.6.0 in /usr/local/lib/python3.10/dist-packages (from google-cloud-bigquery<4.0.0dev,>=1.15.0->google-cloud-aiplatform) (2.3.2)\n",
+            "Requirement already satisfied: google-resumable-media<3.0dev,>=0.6.0 in /usr/local/lib/python3.10/dist-packages (from google-cloud-bigquery<4.0.0dev,>=1.15.0->google-cloud-aiplatform) (2.5.0)\n",
+            "Requirement already satisfied: python-dateutil<3.0dev,>=2.7.2 in /usr/local/lib/python3.10/dist-packages (from google-cloud-bigquery<4.0.0dev,>=1.15.0->google-cloud-aiplatform) (2.8.2)\n",
+            "Requirement already satisfied: grpc-google-iam-v1<1.0.0dev,>=0.12.4 in /usr/local/lib/python3.10/dist-packages (from google-cloud-resource-manager<3.0.0dev,>=1.3.3->google-cloud-aiplatform) (0.12.6)\n",
+            "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from google-auth<3.0.dev0,>=2.14.1->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-cloud-aiplatform) (5.3.1)\n",
+            "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from google-auth<3.0.dev0,>=2.14.1->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-cloud-aiplatform) (0.3.0)\n",
+            "Requirement already satisfied: six>=1.9.0 in /usr/local/lib/python3.10/dist-packages (from google-auth<3.0.dev0,>=2.14.1->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-cloud-aiplatform) (1.16.0)\n",
+            "Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.10/dist-packages (from google-auth<3.0.dev0,>=2.14.1->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-cloud-aiplatform) (4.9)\n",
+            "Requirement already satisfied: google-crc32c<2.0dev,>=1.0 in /usr/local/lib/python3.10/dist-packages (from google-resumable-media<3.0dev,>=0.6.0->google-cloud-bigquery<4.0.0dev,>=1.15.0->google-cloud-aiplatform) (1.5.0)\n",
+            "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0.dev0,>=2.18.0->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-cloud-aiplatform) (1.26.16)\n",
+            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0.dev0,>=2.18.0->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-cloud-aiplatform) (2023.5.7)\n",
+            "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0.dev0,>=2.18.0->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-cloud-aiplatform) (2.0.12)\n",
+            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0.dev0,>=2.18.0->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-cloud-aiplatform) (3.4)\n",
+            "Requirement already satisfied: pyasn1<0.6.0,>=0.4.6 in /usr/local/lib/python3.10/dist-packages (from pyasn1-modules>=0.2.1->google-auth<3.0.dev0,>=2.14.1->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-cloud-aiplatform) (0.5.0)\n",
+            "Installing collected packages: shapely, google-cloud-resource-manager, google-cloud-aiplatform\n",
+            "  Attempting uninstall: shapely\n",
+            "    Found existing installation: shapely 2.0.1\n",
+            "    Uninstalling shapely-2.0.1:\n",
+            "      Successfully uninstalled shapely-2.0.1\n",
+            "Successfully installed google-cloud-aiplatform-1.27.0 google-cloud-resource-manager-1.10.2 shapely-1.8.5.post1\n"
+          ]
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "application/vnd.colab-display-data+json": {
+              "pip_warning": {
+                "packages": [
+                  "google"
+                ]
+              }
+            }
+          },
+          "metadata": {}
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "#Authenticating and testing the VertexAI model"
+      ],
+      "metadata": {
+        "id": "NAcKaXoPH_-F"
+      },
+      "id": "NAcKaXoPH_-F"
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from google.colab import auth as google_auth\n",
+        "google_auth.authenticate_user()\n",
+        "\n",
+        "import vertexai\n",
+        "from vertexai.preview.language_models import TextGenerationModel\n",
+        "\n",
+        "def predict_large_language_model_sample(\n",
+        "    project_id: str,\n",
+        "    model_name: str,\n",
+        "    temperature: float,\n",
+        "    max_decode_steps: int,\n",
+        "    top_p: float,\n",
+        "    top_k: int,\n",
+        "    content: str,\n",
+        "    location: str = \"us-central1\",\n",
+        "    tuned_model_name: str = \"\",\n",
+        "    ) :\n",
+        "    \"\"\"Predict using a Large Language Model.\"\"\"\n",
+        "    vertexai.init(project=project_id, location=location)\n",
+        "    model = TextGenerationModel.from_pretrained(model_name)\n",
+        "    if tuned_model_name:\n",
+        "      model = model.get_tuned_model(tuned_model_name)\n",
+        "    response = model.predict(\n",
+        "        content,\n",
+        "        temperature=temperature,\n",
+        "        max_output_tokens=max_decode_steps,\n",
+        "        top_k=top_k,\n",
+        "        top_p=top_p,)\n",
+        "    print(f\"Response from Model: {response.text}\")\n",
+        "predict_large_language_model_sample(\"octo-t2sql\", \"text-bison@001\", 0.2, 256, 0.8, 40, '''Give me ten interview questions for the role of software engineer''', \"us-central1\")"
+      ],
+      "metadata": {
+        "id": "Ahd3hPeI_IJ6",
+        "outputId": "340527c7-0128-42b6-9704-30b273258ce3",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        }
+      },
+      "id": "Ahd3hPeI_IJ6",
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Response from Model: 1. What is your experience with project management?\n",
+            "2. What is your process for managing a project?\n",
+            "3. How do you handle unexpected challenges or roadblocks?\n",
+            "4. How do you communicate with stakeholders?\n",
+            "5. How do you measure the success of a project?\n",
+            "6. What are your strengths and weaknesses as a project manager?\n",
+            "7. What are your salary expectations?\n",
+            "8. What are your career goals?\n",
+            "9. What are your thoughts on the company's culture?\n",
+            "10. Why are you interested in this position?\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Before running the example, make sure the first parameter of `predict_large_language_model_sample` is corresponding to your `project_id`. You will be prompted to authenticate."
+      ],
+      "metadata": {
+        "id": "WxCz_TiwzwbO"
+      },
+      "id": "WxCz_TiwzwbO"
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Then we can learn the usage and acceleration effect of gptcache with the following code, which consists of three parts:\n",
+        "\n",
+        "1.   Usual way\n",
+        "2.   Exact Search\n",
+        "3.   Similar Search\n",
+        "\n",
+        "\n"
+      ],
+      "metadata": {
+        "id": "_hzEQ9T90AOO"
+      },
+      "id": "_hzEQ9T90AOO"
+    },
+    {
+      "cell_type": "markdown",
+      "id": "aa0ba70e",
+      "metadata": {
+        "id": "aa0ba70e"
+      },
+      "source": [
+        "## VertexAI API standard usage"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "80e9dae2",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "80e9dae2",
+        "outputId": "99a971d2-55bf-4015-fb15-e1c96701d217"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Response from Model: GitHub is a web-based hosting service for software development projects that use the Git revision control system. It offers all of the distributed version control and source code management (SCM) functionality of Git, as well as a graphical user interface (GUI) and web interface that make it easy to manage projects with multiple collaborators.\n",
+            "\n",
+            "GitHub is used by many open source projects, as well as by private companies for software development. It is also used by individuals for personal projects.\n",
+            "\n",
+            "GitHub is a popular choice for software development because it is easy to use, reliable, and secure. It also offers a number of features that make it a good choice for collaboration, including issue tracking, pull requests, and wikis.\n",
+            "\n",
+            "If you are interested in learning more about GitHub, there are a number of resources available online. The GitHub website has a comprehensive help section, and there are also a number of books and articles available on the subject.\n",
+            "Question: what‘s github?\n",
+            "Time consuming: 2.87s\n"
+          ]
+        }
+      ],
+      "source": [
+        "import time\n",
+        "\n",
+        "# def response_text(vertexai_resp):\n",
+        "#     return vertexai_resp['choices'][0]['message']['content']\n",
+        "\n",
+        "\n",
+        "question = 'what‘s github?'\n",
+        "\n",
+        "# VertexAI API original usage\n",
+        "start_time = time.time()\n",
+        "response = predict_large_language_model_sample(\"octo-t2sql\", \"text-bison@001\", 0.2, 256, 0.8, 40, question, \"us-central1\")\n",
+        "\n",
+        "print(f'Question: {question}')\n",
+        "print(\"Time consuming: {:.2f}s\".format(time.time() - start_time))\n",
+        "# print(f'Answer: {response_text(response)}\\n')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "9d871550",
+      "metadata": {
+        "id": "9d871550"
+      },
+      "source": [
+        "## VertexAI API + GPTCache using LangChain 🦜️🔗 (exact match cache)\n",
+        "\n",
+        "Initalize the cache to run GPTCache and import `LangChainLLMs` from `gptcache.adapter.langchain_models`, which will automatically set the map data manager to match the exact cahe, more details refer to [build your cache](https://gptcache.readthedocs.io/en/dev/usage.html#build-your-cache).\n",
+        "\n",
+        "And if you ask the exact same two questions, the answer to the second question will be obtained from the cache without requesting the model again."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "024484f3",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "024484f3",
+        "outputId": "3dff3ee8-8acc-43ab-c27a-7ff2b4286ac3"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "The New England Patriots won Super Bowl XXXIX in 2005, the year Justin Bieber was born.\n",
+            "Read through Time Spent = 0.0011386871337890625\n",
+            "The New England Patriots won Super Bowl XXXIX in 2005, the year Justin Bieber was born.\n",
+            "Cache Hit Time Spent = 0.0007178783416748047\n"
+          ]
+        }
+      ],
+      "source": [
+        "import time\n",
+        "from langchain import SQLDatabase, SQLDatabaseChain\n",
+        "from langchain.llms import VertexAI\n",
+        "from langchain import PromptTemplate, LLMChain\n",
+        "\n",
+        "\n",
+        "\n",
+        "# the following initialises the cache\n",
+        "# -------------------------------------------------\n",
+        "from gptcache.adapter.langchain_models import LangChainLLMs\n",
+        "from gptcache import Cache\n",
+        "from gptcache.processor.pre import get_prompt\n",
+        "\n",
+        "llm = VertexAI()\n",
+        "\n",
+        "\n",
+        "llm_cache = Cache()\n",
+        "llm_cache.init(\n",
+        "    pre_embedding_func=get_prompt,\n",
+        ")\n",
+        "\n",
+        "cached_llm = LangChainLLMs(llm=llm)\n",
+        "answer = cached_llm(prompt=question, cache_obj=llm_cache)\n",
+        "# -------------------------------------------------\n",
+        "\n",
+        "# before = time.time()\n",
+        "# print(answer)\n",
+        "# print(\"Read through Time Spent =\", time.time() - before)\n",
+        "\n",
+        "# before = time.time()\n",
+        "# answer = cached_llm(prompt=question, cache_obj=llm_cache)\n",
+        "# print(answer)\n",
+        "# print(\"Cache Hit Time Spent =\", time.time() - before)\n",
+        "\n",
+        "question = \"What NFL team won the Super Bowl in the year Justin Bieber was born?\"\n",
+        "\n",
+        "before = time.time()\n",
+        "print(answer)\n",
+        "print(\"Read through Time Spent =\", time.time() - before)\n",
+        "\n",
+        "before = time.time()\n",
+        "answer = cached_llm(prompt=question, cache_obj=llm_cache)\n",
+        "print(answer)\n",
+        "print(\"Cache Hit Time Spent =\", time.time() - before)\n",
+        "\n",
+        "# for _ in range(2):\n",
+        "#     start_time = time.time()\n",
+        "#     response = predict_large_language_model_sample(\"octo-t2sql\", \"text-bison@001\", 0.2, 256, 0.8, 40, question, \"us-central1\")\n",
+        "#     print(f'Question: {question}')\n",
+        "#     print(\"Time consuming: {:.2f}s\".format(time.time() - start_time))\n",
+        "    # print(f'Answer: {response_text(response)}\\n')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "6f2ff699",
+      "metadata": {
+        "id": "6f2ff699"
+      },
+      "source": [
+        "## VertexAI API + GPTCache, similar search cache\n",
+        "\n",
+        "Set the cache with `embedding_func` to generate embedding for the text, and `data_manager` to manager the cache data, `similarity_evaluation` to evaluate the similarities, more details refer to [build your cache](https://gptcache.readthedocs.io/en/dev/usage.html#build-your-cache).\n",
+        "\n",
+        "After obtaining an answer in response to several similar questions, the answers to subsequent questions can be retrieved from the cache without the need to request the model again."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "How similar search works:\n",
+        "\n",
+        "*   Similarity evaluator collects data from **Cache Storage and Vector Store** to determine similarity between input request and Vector Store requests\n",
+        "*   Request Router returns request that is most similar to input request from cache\n",
+        "\n"
+      ],
+      "metadata": {
+        "id": "Pe8Qz7pqBcNa"
+      },
+      "id": "Pe8Qz7pqBcNa"
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "fd1ff06e",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "fd1ff06e",
+        "outputId": "5c108328-1f77-47b7-a7a6-c641153493d9"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Cache loading.....\n",
+            "Response from Model: GitHub is a web-based hosting service for software development projects that use the Git revision control system. It offers all of the distributed version control and source code management (SCM) functionality of Git, as well as a graphical user interface (GUI) and web interface, making it easy for teams to collaborate on software projects.\n",
+            "\n",
+            "GitHub is used by many large organizations, including Google, Facebook, Amazon, and Microsoft. It is also popular with open source projects, such as the Linux kernel and the Apache web server.\n",
+            "\n",
+            "GitHub is free for open source projects, but there is a paid subscription option for private projects. The paid subscription offers additional features, such as unlimited private repositories, priority support, and the ability to host private wikis and blogs.\n",
+            "Question: what's github\n",
+            "Time consuming: 2.41s\n",
+            "Response from Model: GitHub is a web-based hosting service for software development projects that use the Git revision control system. It offers all of the distributed version control and source code management (SCM) functionality of Git, as well as a graphical user interface (GUI) and web interface, making it easy for teams to collaborate on software projects.\n",
+            "\n",
+            "GitHub is used by many open source projects, as well as by private companies for software development. It is also used by many educational institutions for teaching software engineering.\n",
+            "\n",
+            "GitHub is a popular choice for software development because it is easy to use, has a large community of users, and offers a variety of features that make it well-suited for collaboration.\n",
+            "\n",
+            "Here are some of the benefits of using GitHub:\n",
+            "\n",
+            "* It is easy to use. GitHub has a simple and intuitive interface that makes it easy for developers of all levels of experience to use.\n",
+            "* It has a large community of users. GitHub has a large community of users who are willing to help each other out. This can be a valuable resource for developers who are stuck on a problem.\n",
+            "* It offers a variety of features. GitHub offers a variety of features that make it well-suited for collaboration, including issue tracking, pull requests, and code review.\n",
+            "\n",
+            "Question: can you explain what GitHub is\n",
+            "Time consuming: 2.95s\n",
+            "Response from Model: GitHub is a web-based hosting service for software development projects that use the Git revision control system. It offers all of the distributed version control and source code management (SCM) functionality of Git, as well as a graphical user interface (GUI) and web interface, making it easy for teams to collaborate on software projects.\n",
+            "\n",
+            "GitHub is used by many open source projects, as well as by private companies for software development. It is also used by many educational institutions for teaching software engineering.\n",
+            "\n",
+            "GitHub is free for open source projects, and has a paid subscription service for private projects. The paid service offers additional features, such as private repositories, unlimited collaborators, and support.\n",
+            "\n",
+            "GitHub is a popular choice for software development because it is easy to use, has a large community of users, and offers a variety of features that make it well-suited for collaboration.\n",
+            "Question: can you tell me more about GitHub\n",
+            "Time consuming: 2.80s\n",
+            "Response from Model: GitHub is a web-based hosting service for software development projects that use the Git revision control system. It offers all of the distributed version control and source code management (SCM) functionality of Git, as well as a number of additional features such as issue tracking, project management, wikis, and code review.\n",
+            "\n",
+            "GitHub is used by many open source projects, as well as by private companies for software development. It is also a popular platform for hosting personal projects.\n",
+            "\n",
+            "GitHub is free for open source projects and for private projects with fewer than five users. For private projects with more than five users, GitHub offers a paid subscription plan.\n",
+            "\n",
+            "GitHub is a powerful tool for software development. It can help teams to collaborate more effectively, track changes to code, and manage projects. It is also a great platform for hosting personal projects.\n",
+            "Question: what is the purpose of GitHub\n",
+            "Time consuming: 2.79s\n"
+          ]
+        }
+      ],
+      "source": [
+        "import time\n",
+        "\n",
+        "\n",
+        "# def response_text(vertexai_resp):\n",
+        "#     return vertexai_resp['choices'][0]['message']['content']\n",
+        "\n",
+        "from gptcache import cache\n",
+        "from langchain import SQLDatabase, SQLDatabaseChain\n",
+        "from gptcache.embedding import Onnx\n",
+        "from gptcache.manager import CacheBase, VectorBase, get_data_manager\n",
+        "from gptcache.similarity_evaluation.distance import SearchDistanceEvaluation\n",
+        "\n",
+        "print(\"Cache loading.....\")\n",
+        "\n",
+        "onnx = Onnx()\n",
+        "data_manager = get_data_manager(CacheBase(\"sqlite\"), VectorBase(\"faiss\", dimension=onnx.dimension))\n",
+        "cache.init(\n",
+        "    embedding_func=onnx.to_embeddings,\n",
+        "    data_manager=data_manager,\n",
+        "    similarity_evaluation=SearchDistanceEvaluation(),\n",
+        "    )\n",
+        "\n",
+        "\n",
+        "questions = [\n",
+        "    \"what's github\",\n",
+        "    \"can you explain what GitHub is\",\n",
+        "    \"can you tell me more about GitHub\",\n",
+        "    \"what is the purpose of GitHub\"\n",
+        "]\n",
+        "\n",
+        "for question in questions:\n",
+        "    start_time = time.time()\n",
+        "    response = predict_large_language_model_sample(\"octo-t2sql\", \"text-bison@001\", 0.2, 256, 0.8, 40, question, \"us-central1\")\n",
+        "    print(f'Question: {question}')\n",
+        "    print(\"Time consuming: {:.2f}s\".format(time.time() - start_time))\n",
+        "\n",
+        "  # before = time.time()\n",
+        "  # print(answer)\n",
+        "  # print(\"Read through Time Spent =\", time.time() - before)\n",
+        "\n",
+        "  # before = time.time()\n",
+        "  # answer = cached_llm(prompt=question, cache_obj=llm_cache)\n",
+        "  # print(answer)\n",
+        "  # print(\"Cache Hit Time Spent =\", time.time() - before)\n",
+        "\n"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3 (ipykernel)",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.9.12"
+    },
+    "colab": {
+      "provenance": []
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 5
+}
\ No newline at end of file
diff --git a/docs/toc.bak b/docs/toc.bak
index 376e1b00..9d24a068 100644
--- a/docs/toc.bak
+++ b/docs/toc.bak
@@ -20,6 +20,7 @@
    bootcamp/openai/index
    bootcamp/replicate/index
    bootcamp/temperature/index
+   bootcamp/vertex/index
 
 .. toctree::
    :maxdepth: 1