Merge branch 'main' into example/vllm-model-deployer

zenml-io · Dec 5, 2024 · 5f091e2 · 5f091e2
2 parents 1c31571 + 1b90127
commit 5f091e2
Show file tree

Hide file tree

Showing 36 changed files with 954 additions and 780 deletions.
diff --git a/.github/workflows/production_run_complete_llm.yml b/.github/workflows/production_run_complete_llm.yml
@@ -11,11 +11,11 @@ concurrency:
   cancel-in-progress: true
 
 jobs:
-  run-staging-workflow:
+  run-production-workflow:
     runs-on: ubuntu-latest
     env:
-      ZENML_HOST: ${{ secrets.ZENML_PROJECTS_HOST }}
-      ZENML_API_KEY: ${{ secrets.ZENML_PROJECTS_API_KEY }}
+      ZENML_STORE_URL: ${{ secrets.ZENML_PROJECTS_HOST }}
+      ZENML_STORE_API_KEY: ${{ secrets.ZENML_PROJECTS_API_KEY }}
       ZENML_PRODUCTION_STACK: b3951d43-0fb2-4d32-89c5-3399374e7c7e # Set this to your production stack ID
       ZENML_GITHUB_SHA: ${{ github.event.pull_request.head.sha }}
       ZENML_GITHUB_URL_PR: ${{ github.event.pull_request._links.html.href }}
@@ -38,15 +38,15 @@ jobs:
       - name: Install requirements
         working-directory: ./llm-complete-guide
         run: |
-          pip3 install -r requirements.txt
-          pip3 install -r requirements-argilla.txt
-          zenml integration install gcp -y
+          pip3 install uv
+          uv pip install -r requirements.txt --system
+          uv pip install -r requirements-argilla.txt --system
+          zenml integration install gcp -y --uv
 
       - name: Connect to ZenML server
         working-directory: ./llm-complete-guide
         run: |
           zenml init
-          zenml connect --url $ZENML_HOST --api-key $ZENML_API_KEY
 
       - name: Set stack (Production)
         working-directory: ./llm-complete-guide
@@ -56,4 +56,4 @@ jobs:
       - name: Run pipeline, create pipeline, configure trigger (Production)
         working-directory: ./llm-complete-guide
         run: |
-          python gh_action_rag.py --no-cache --create-template ----event-source-id  --service-account-id ${{ env.ZENML_SERVICE_ACCOUNT_ID }} --action-id  ${{ env.ZENML_ACTION_ID }} --config rag_gcp.yaml
+          python gh_action_rag.py --no-cache --create-template --event-source-id ${{ env.ZENML_EVENT_SOURCE_ID }} --service-account-id ${{ env.ZENML_SERVICE_ACCOUNT_ID }} --config production/rag.yaml --zenml-model-version production
diff --git a/.github/workflows/staging_run_complete_llm.yml b/.github/workflows/staging_run_complete_llm.yml
@@ -12,8 +12,8 @@ jobs:
   run-staging-workflow:
     runs-on: ubuntu-latest
     env:
-      ZENML_HOST: ${{ secrets.ZENML_PROJECTS_HOST }}
-      ZENML_API_KEY: ${{ secrets.ZENML_PROJECTS_API_KEY }}
+      ZENML_STORE_URL: ${{ secrets.ZENML_PROJECTS_HOST }}
+      ZENML_STORE_API_KEY: ${{ secrets.ZENML_PROJECTS_API_KEY }}
       ZENML_STAGING_STACK : 67166d73-a44e-42f9-b67f-011e9afab9b5 # Set this to your staging stack ID
       ZENML_GITHUB_SHA: ${{ github.event.pull_request.head.sha }}
       ZENML_GITHUB_URL_PR: ${{ github.event.pull_request._links.html.href }}
@@ -34,15 +34,15 @@ jobs:
       - name: Install requirements
         working-directory: ./llm-complete-guide
         run: |
-          pip3 install -r requirements.txt
-          pip3 install -r requirements-argilla.txt
-          zenml integration install aws s3 -y
+          pip3 install uv
+          uv pip install -r requirements.txt --system
+          uv pip install -r requirements-argilla.txt --system
+          zenml integration install aws s3 -y --uv
 
       - name: Connect to ZenML server
         working-directory: ./llm-complete-guide
         run: |
           zenml init
-          zenml connect --url $ZENML_HOST --api-key $ZENML_API_KEY
 
       - name: Set stack (Staging)
         working-directory: ./llm-complete-guide
@@ -52,4 +52,4 @@ jobs:
       - name: Run pipeline (Staging)
         working-directory: ./llm-complete-guide
         run: |
-          python gh_action_rag.py --no-cache --config rag_local_dev.yaml
+          python gh_action_rag.py --no-cache --config staging/rag.yaml --zenml-model-version staging
diff --git a/llm-complete-guide/README.md b/llm-complete-guide/README.md
@@ -23,7 +23,7 @@ instructions are provided below for how to set that up.
 
 ## 📽️ Watch the webinars
 
-We've recently been holding some webinars about this repository and project. Watche the videos below if you want an introduction and context around the code and ideas covered in this project.
+We've recently been holding some webinars about this repository and project. Watch the videos below if you want an introduction and context around the code and ideas covered in this project.
 
 [![Building and Optimizing RAG Pipelines: Data Preprocessing, Embeddings, and Evaluation with ZenML](https://github.com/user-attachments/assets/1aea2bd4-8079-4ea2-98e1-8da6ba9aeebe)](https://www.youtube.com/watch?v=PazRMY8bo3U)
 
@@ -45,7 +45,7 @@ pip install -r requirements.txt
 
 Depending on your hardware you may run into some issues when running the `pip install` command with the
 `flash_attn` package. In that case running `FLASH_ATTENTION_SKIP_CUDA_BUILD=TRUE pip install flash-attn --no-build-isolation` 
-could help you.
+could help you. Possibly you might also need to install torch separately.
 
 In order to use the default LLM for this query, you'll need an account and an
 API key from OpenAI specified as a ZenML secret:
@@ -85,7 +85,7 @@ to run the pipelines in the correct order. You can run the script with the
 following command:
 
 ```shell
-python run.py --rag
+python run.py rag
 ```
 
 This will run the basic RAG pipeline, which scrapes the ZenML documentation and
@@ -100,7 +100,7 @@ use for the LLM.
 When you're ready to make the query, run the following command:
 
 ```shell
-python run.py --query "how do I use a custom materializer inside my own zenml steps? i.e. how do I set it? inside the @step decorator?" --model=gpt4
+python run.py query "how do I use a custom materializer inside my own zenml steps? i.e. how do I set it? inside the @step decorator?" --model=gpt4
 ```
 
 Alternative options for LLMs to use include:
@@ -164,7 +164,7 @@ the RAG pipeline.
 To run the evaluation pipeline, you can use the following command:
 
 ```shell
-python run.py --evaluation
+python run.py evaluation
 ```
 
 You'll need to have first run the RAG pipeline to have the necessary assets in
@@ -182,7 +182,7 @@ To run the `distilabel` synthetic data generation pipeline, you can use the foll
 
 ```shell
 pip install -r requirements-argilla.txt # special requirements
-python run.py --synthetic
+python run.py synthetic
 ```
 
 You will also need to have set up and connected to an Argilla instance for this
@@ -221,7 +221,7 @@ commands:
 
 ```shell
 pip install -r requirements-argilla.txt # special requirements
-python run.py --embeddings
+python run.py embeddings
 ```
 
 *Credit to Phil Schmid for his [tutorial on embeddings finetuning with Matryoshka

diff --git a/llm-complete-guide/ZENML_VERSION.txt b/llm-complete-guide/ZENML_VERSION.txt
@@ -0,0 +1 @@
+0.71.0
diff --git a/llm-complete-guide/configs/embeddings.yaml → ...omplete-guide/configs/dev/embeddings.yaml b/llm-complete-guide/configs/embeddings.yaml → ...omplete-guide/configs/dev/embeddings.yaml
@@ -3,7 +3,7 @@
 # environment configuration
 settings:
   docker:
-    parent_image: "zenmldocker/prepare-release:base-0.68.0"
+    python_package_installer: "uv"
     requirements:
       - langchain-community
       - ratelimit
@@ -27,14 +27,6 @@ settings:
       - datasets
       - torch
       - pygithub
+      - openai
     environment:
-      ZENML_PROJECT_SECRET_NAME: llm_complete
-
-
-# configuration of the Model Control Plane
-model:
-  name: finetuned-zenml-docs-embeddings
-  version: latest
-  license: Apache 2.0
-  description: Finetuned LLM on ZenML docs
-  tags: ["rag", "finetuned"]
+      ZENML_PROJECT_SECRET_NAME: llm_complete
diff --git a/...complete-guide/configs/rag_local_dev.yaml → llm-complete-guide/configs/dev/rag.yaml b/...complete-guide/configs/rag_local_dev.yaml → llm-complete-guide/configs/dev/rag.yaml
@@ -15,20 +15,17 @@ settings:
       - ratelimit
       - rerankers
       - pygithub
+      - rerankers[flashrank]
+      - matplotlib
+      - elasticsearch
+
     environment:
       ZENML_PROJECT_SECRET_NAME: llm_complete
       ZENML_ENABLE_RICH_TRACEBACK: FALSE
       ZENML_LOGGING_VERBOSITY: INFO
-
-
-# configuration of the Model Control Plane
-model:
-  name: finetuned-zenml-docs-embeddings
-  license: Apache 2.0
-  description: Finetuned LLM on ZenML docs
-  tags: ["rag", "finetuned"]
-
+    python_package_installer: "uv"
 steps:
   url_scraper:
     parameters:
-      docs_url: https://docs.zenml.io/stack-components/orchestrators
+      docs_url: https://docs.zenml.io/
+      use_dev_set: true
diff --git a/llm-complete-guide/configs/rag_eval.yaml → llm-complete-guide/configs/dev/rag_eval.yaml b/llm-complete-guide/configs/rag_eval.yaml → llm-complete-guide/configs/dev/rag_eval.yaml
@@ -13,10 +13,5 @@ settings:
       - psycopg2-binary
       - tiktoken
       - pygithub
-
-# configuration of the Model Control Plane
-model:
-  name: finetuned-zenml-docs-embeddings
-  license: Apache 2.0
-  description: Finetuned LLM on ZenML docs
-  tags: ["rag", "finetuned"]
+      - elasticsearch
+    python_package_installer: "uv"
diff --git a/llm-complete-guide/configs/synthetic.yaml → ...complete-guide/configs/dev/synthetic.yaml b/llm-complete-guide/configs/synthetic.yaml → ...complete-guide/configs/dev/synthetic.yaml
@@ -25,14 +25,7 @@ settings:
       - torch
       - distilabel
       - pygithub
+      - openai
     environment:
       ZENML_PROJECT_SECRET_NAME: llm_complete
-
-
-# configuration of the Model Control Plane
-model:
-  name: finetuned-zenml-docs-embeddings
-  version: latest
-  license: Apache 2.0
-  description: Finetuned LLM on ZenML docs
-  tags: ["rag", "finetuned"]
+    python_package_installer: "uv"
diff --git a/llm-complete-guide/configs/production/embeddings.yaml b/llm-complete-guide/configs/production/embeddings.yaml
@@ -0,0 +1,37 @@
+# enable_cache: False
+
+# environment configuration
+settings:
+  docker:
+    requirements:
+      - ratelimit
+      - pgvector
+      - psycopg2-binary
+      - beautifulsoup4
+      - unstructured
+      - pandas
+      - numpy
+      - sentence-transformers>=3
+      - transformers[torch]==4.43.1
+      - litellm
+      - ollama
+      - tiktoken
+      - umap-learn
+      - matplotlib
+      - pyarrow
+      - rerankers[flashrank]
+      - datasets
+      - torch
+      - pygithub
+      - openai
+    environment:
+      ZENML_PROJECT_SECRET_NAME: llm_complete
+    python_package_installer: "uv"
+
+steps:
+  finetune:
+    step_operator: "gcp_a100"
+    settings:
+      step_operator.vertex:
+        accelerator_count: 1
+        accelerator_type: NVIDIA_TESLA_A100
diff --git a/llm-complete-guide/configs/production/eval.yaml b/llm-complete-guide/configs/production/eval.yaml
@@ -0,0 +1,25 @@
+enable_cache: False
+
+# environment configuration
+settings:
+  docker:
+    requirements:
+      - unstructured
+      - sentence-transformers>=3
+      - pgvector
+      - datasets
+      - litellm
+      - numpy
+      - psycopg2-binary
+      - tiktoken
+      - ratelimit
+      - rerankers[flashrank]
+      - matplotlib
+      - pillow
+      - pygithub
+      - elasticsearch
+    environment:
+      ZENML_PROJECT_SECRET_NAME: llm_complete
+      ZENML_ENABLE_RICH_TRACEBACK: FALSE
+      ZENML_LOGGING_VERBOSITY: INFO
+    python_package_installer: "uv"
diff --git a/llm-complete-guide/configs/rag_gcp.yaml → ...omplete-guide/configs/production/rag.yaml b/llm-complete-guide/configs/rag_gcp.yaml → ...omplete-guide/configs/production/rag.yaml
@@ -1,3 +1,5 @@
+enable_cache: False
+
 # environment configuration
 settings:
   docker:
@@ -13,29 +15,23 @@ settings:
       - ratelimit
       - rerankers
       - pygithub
+      - rerankers[flashrank]
+      - matplotlib
+      - elasticsearch
+
     environment:
       ZENML_PROJECT_SECRET_NAME: llm_complete
       ZENML_ENABLE_RICH_TRACEBACK: FALSE
       ZENML_LOGGING_VERBOSITY: INFO
-
+    python_package_installer: "uv"
 steps:
   url_scraper:
     parameters:
       docs_url: https://docs.zenml.io
-      repo_url: https://github.com/zenml-io/zenml
-      website_url: https://zenml.io
-
+      use_dev_set: false
 #  generate_embeddings:
-#    step_operator: "terraform-gcp-6c0fd52233ca"
+#    step_operator: "sagemaker"
 #    settings:
-#      step_operator.vertex:
-#        accelerator_type: "NVIDIA_TESLA_P100"
+#      step_operator.sagemaker:
 #        accelerator_count: 1
-#        machine_type: "n1-standard-8"
-
-# configuration of the Model Control Plane
-model:
-  name: finetuned-zenml-docs-embeddings
-  license: Apache 2.0
-  description: Finetuned LLM on ZenML docs
-  tags: ["rag", "finetuned"]
+#        accelerator_type: NVIDIA_TESLA_A100
diff --git a/llm-complete-guide/configs/production/synthetic.yaml b/llm-complete-guide/configs/production/synthetic.yaml
@@ -0,0 +1,29 @@
+# environment configuration
+settings:
+  docker:
+    requirements:
+      - ratelimit
+      - pgvector
+      - psycopg2-binary
+      - beautifulsoup4
+      - unstructured
+      - pandas
+      - numpy
+      - sentence-transformers>=3
+      - transformers==4.43.1
+      - litellm
+      - ollama
+      - tiktoken
+      - umap-learn
+      - matplotlib
+      - pyarrow
+      - rerankers[flashrank]
+      - datasets
+      - torch
+      - distilabel
+      - argilla
+      - pygithub
+      - openai
+    environment:
+      ZENML_PROJECT_SECRET_NAME: llm_complete
+    python_package_installer: "uv"