Skip to content

Commit

Permalink
bigmodel pipeline update cp38 to cp310 (microsoft#22793)
Browse files Browse the repository at this point in the history
### Description
<!-- Describe your changes. -->
when updating from cp38 to cp310, there has some issues for bigmodel
pipeine. there are two jobs failed: stable_diffusion and whisper.

1. for stable_diffusion, we are now using
"nvcr.io/nvidia/pytorch:22.11-py3" from nvidia repo. it is for cuda11
and python3.8. and they are not providing python3.10 version for cuda
11. the latest version of this docker image is for cuda12 and
python3.10. To solve this problem, i use a docker image of ubuntu22.04,
and then install all need python package for this job.
2. for whisper. the original docker image is ubuntu20.04 which doesn't
have python3.10, and has to update to ubuntu22.04.
  • Loading branch information
kailums authored and ankitm3k committed Dec 11, 2024
1 parent d55cfde commit beee163
Show file tree
Hide file tree
Showing 6 changed files with 89 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ packaging
protobuf==3.20.3
psutil
sympy
nvtx==0.2.5
torchvision==0.15.2
tensorrt==8.5.1.7
mediapipe
controlnet_aux==0.0.9
# The following are for SDXL
optimum==1.20.0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ git+https://github.com/openai/CLIP.git
open_clip_torch
sentence_transformers
pillow
numpy==1.22.2
27 changes: 18 additions & 9 deletions tools/ci_build/github/azure-pipelines/bigmodels-ci-pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ stages:
set -ex; \
env; \
ccache -s; \
/opt/python/cp38-cp38/bin/python3 /onnxruntime_src/tools/ci_build/build.py \
/opt/python/cp310-cp310/bin/python3 /onnxruntime_src/tools/ci_build/build.py \
--build_dir /build --cmake_generator Ninja \
--config Release --update --build \
--skip_submodule_sync \
Expand Down Expand Up @@ -180,6 +180,17 @@ stages:
TargetPath: '$(Build.BinariesDirectory)/Release'
SpecificArtifact: ${{ parameters.specificArtifact }}
BuildId: ${{ parameters.BuildId }}
- template: templates/get-docker-image-steps.yml
parameters:
Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.package_ubuntu_2204_gpu_opencv
Context: tools/ci_build/github/linux/docker/
ScriptName: tools/ci_build/get_docker_image.py
DockerBuildArgs: "
--build-arg BUILD_UID=$( id -u )
"
Repository: onnxruntimeubuntupackagestest_cuda11
UseImageCacheContainerRegistry: false
UpdateDepsTxt: false

- task: Cache@2
inputs:
Expand All @@ -196,18 +207,15 @@ stages:
-v $(Build.BinariesDirectory)/Release:/Release \
-v $(STABLE_DIFFUSION_MODEL_CACHE):/model_cache:rw \
-v $(GenerateImage_DIR):/images:rw \
nvcr.io/nvidia/pytorch:22.11-py3 \
onnxruntimeubuntupackagestest_cuda11 \
bash -c ' \
set -ex; \
pip uninstall -y $(pip list --format=freeze | grep opencv); \
rm -rf /usr/local/lib/python3.8/dist-packages/cv2/; \
apt-get update; \
DEBIAN_FRONTEND="noninteractive" apt-get install --yes python3-opencv; \
python3 --version; \
python3 -m pip install --upgrade pip; \
python3 -m pip install /Release/*.whl; \
pushd /workspace/onnxruntime/python/tools/transformers/models/stable_diffusion; \
python3 -m pip install -r requirements/cuda11/requirements.txt; \
python3 -m pip install numpy==1.22.2; \
python3 -m pip install --upgrade polygraphy onnx-graphsurgeon ; \
echo Generate an image guided by a text prompt; \
python3 demo_txt2img.py --framework-model-dir /model_cache --seed 1 --deterministic "astronaut riding a horse on mars" ; \
Expand Down Expand Up @@ -238,7 +246,7 @@ stages:
- script: |
docker run --rm --gpus all -v $PWD:/workspace \
-v $(CLIP_MODEL_CACHE):/model_cache:rw \
nvcr.io/nvidia/pytorch:22.11-py3 \
onnxruntimeubuntupackagestest_cuda11 \
bash -c '
set -x; \
python3 --version; \
Expand All @@ -265,14 +273,15 @@ stages:
- script: |
docker run --rm --gpus all -v $PWD:/workspace \
-v $(CLIP_MODEL_CACHE):/model_cache:rw \
nvcr.io/nvidia/pytorch:22.11-py3 \
onnxruntimeubuntupackagestest_cuda11 \
bash -c '
set -ex; \
python3 --version; \
python3 -m pip install --upgrade pip; \
pushd /workspace/onnxruntime/python/tools/transformers/models/stable_diffusion/; \
image2=$(find $(pwd) -name "astronaut_riding_a_h*.png") ; \
pushd test; \
python3 -m pip install numpy==1.22.2; \
python3 -m pip install -r requirements.txt; \
echo check demo_txt2image.py generate image; \
python3 -u check_image.py --image1 astronaut_riding_txt2image-DDIM-50.png --image2 $image2 --cache_dir /model_cache ; \
Expand Down Expand Up @@ -438,7 +447,7 @@ stages:
- template: templates/get-docker-image-steps.yml
parameters:
Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.package_ubuntu_2004_gpu_ffmpeg
Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.package_ubuntu_2204_gpu_ffmpeg
Context: tools/ci_build/github/linux/docker/
ScriptName: tools/ci_build/get_docker_image.py
DockerBuildArgs: '--build-arg BUILD_UID=$( id -u )'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ ARG BASEIMAGE=nvidia/cuda:11.8.0-cudnn8-devel-ubi8
ARG TRT_VERSION=10.6.0.26-1.cuda11.8
FROM $BASEIMAGE AS base
ARG TRT_VERSION
ENV PATH=/opt/python/cp38-cp38/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/src/tensorrt/bin:${PATH}
ENV PATH=/opt/python/cp310-cp310/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/src/tensorrt/bin:${PATH}

RUN dnf install -y bash wget &&\
dnf clean dbcache
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# Dockerfile to run ONNXRuntime with TensorRT integration

# Build base image with required system packages
ARG BASEIMAGE=nvidia/cuda:11.8.0-cudnn8-devel-ubuntu20.04
ARG BASEIMAGE=nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04
ARG TRT_VERSION=10.6.0.26-1+cuda11.8
ARG LD_LIBRARY_PATH_ARG=/usr/local/lib64:/usr/local/cuda/lib64
FROM $BASEIMAGE AS base
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# --------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------
# Dockerfile to run ONNXRuntime with TensorRT integration

# Build base image with required system packages
ARG BASEIMAGE=nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04
ARG TRT_VERSION=10.6.0.26-1+cuda11.8
ARG LD_LIBRARY_PATH_ARG=/usr/local/lib64:/usr/local/cuda/lib64
FROM $BASEIMAGE AS base
ARG TRT_VERSION
ENV PATH=/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/src/tensorrt/bin:${PATH}
ENV DEBIAN_FRONTEND=noninteractive

ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH_ARG}:${LD_LIBRARY_PATH}

RUN apt-get update &&\
apt-get install -y git bash wget diffutils

RUN DEBIAN_FRONTEND="noninteractive" apt-get install --yes python3-opencv

# Install python3
RUN apt-get install -y --no-install-recommends \
python3 \
python3-pip \
python3-dev \
python3-wheel

RUN pip install --upgrade pip

# Install TensorRT
RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/7fa2af80.pub &&\
apt-get update &&\
apt-get install -y \
libnvinfer-dev=${TRT_VERSION} \
libnvinfer-dispatch-dev=${TRT_VERSION} \
libnvinfer-dispatch10=${TRT_VERSION} \
libnvinfer-headers-dev=${TRT_VERSION} \
libnvinfer-headers-plugin-dev=${TRT_VERSION} \
libnvinfer-lean-dev=${TRT_VERSION} \
libnvinfer-lean10=${TRT_VERSION} \
libnvinfer-plugin-dev=${TRT_VERSION} \
libnvinfer-plugin10=${TRT_VERSION} \
libnvinfer-vc-plugin-dev=${TRT_VERSION} \
libnvinfer-vc-plugin10=${TRT_VERSION} \
libnvinfer10=${TRT_VERSION} \
libnvonnxparsers-dev=${TRT_VERSION} \
libnvonnxparsers10=${TRT_VERSION} \
tensorrt-dev=${TRT_VERSION} \
libnvinfer-bin=${TRT_VERSION} &&\
if [ $(echo $CUDA_VERSION | cut -d"." -f1) -ge 12 ]; then apt-get install -y cudnn9-cuda-12 ; fi
# ^^^^^^^^^^^If cuda version is 12 or higher, install cudnn 9 for cuda 12

ADD scripts /tmp/scripts
RUN cd /tmp/scripts && /tmp/scripts/install_dotnet.sh && rm -rf /tmp/scripts

# Build final image from base.
FROM base as final
ARG BUILD_USER=onnxruntimedev
ARG BUILD_UID=1000
RUN adduser --uid $BUILD_UID $BUILD_USER
WORKDIR /home/$BUILD_USER
USER $BUILD_USER

0 comments on commit beee163

Please sign in to comment.