From 71e7c2b4235a5132e4b9d00a01b18abe3aab71cd Mon Sep 17 00:00:00 2001 From: Edward Chen <18449977+edgchen1@users.noreply.github.com> Date: Tue, 17 Nov 2020 17:02:24 -0800 Subject: [PATCH] Cache build docker images in container registry. (#5811) This PR adds infrastructure to automatically cache docker images used in CI builds in a container registry. Currently, build images are pulled from a container registry for some builds and built every time for others. The container registry requires maintenance to keep the images up to date and building images every time wastes build agent resources. With this change, a given build image can be looked up in a cache container registry and if present, pulled, and otherwise, built and pushed. The uniqueness of a build image is determined by a hash digest of the dockerfile, docker build context directory, and certain "docker build" options. This digest is part of the image tag in the cache container repository. The cache container registry will need to be cleaned up periodically. This is not automated yet. --- ...blob.py => download_azure_blob_archive.py} | 22 ++- tools/ci_build/build.py | 35 ++-- tools/ci_build/exclude_unused_ops.py | 5 +- tools/ci_build/get_docker_image.py | 166 ++++++++++++++++++ .../linux-gpu-tensorrt-ci-pipeline.yml | 8 +- .../linux-multi-gpu-tensorrt-ci-pipeline.yml | 2 +- .../linux-ngraph-ci-pipeline.yml | 5 +- .../linux-nuphar-ci-pipeline.yml | 2 +- .../linux-openvino-ci-pipeline.yml | 2 +- .../linux-openvino-nightly-pipeline.yml | 7 +- .../orttraining-linux-gpu-ci-pipeline.yml | 3 +- ...raining-linux-gpu-e2e-test-ci-pipeline.yml | 14 +- ...ng-linux-gpu-e2e-test-nightly-pipeline.yml | 38 ++-- ...ng-linux-gpu-frontend-test-ci-pipeline.yml | 25 +-- ...aining-linux-gpu-perf-test-ci-pipeline.yml | 24 +-- .../azure-pipelines/templates/linux-ci.yml | 7 +- .../templates/run-docker-build-steps.yml | 17 ++ .../with-build-docker-image-cache-steps.yml | 25 +++ tools/ci_build/github/download_test_data.py | 2 +- .../ci_build/github/linux/run_dockerbuild.sh | 60 +++++-- tools/ci_build/logger.py | 11 +- tools/ci_build/op_registration_utils.py | 4 +- tools/ci_build/op_registration_validator.py | 4 +- tools/python/util/__init__.py | 5 + tools/python/{ => util}/get_azcopy.py | 8 +- tools/python/util/run.py | 43 +++++ 26 files changed, 429 insertions(+), 115 deletions(-) rename orttraining/tools/ci_test/{download_azure_blob.py => download_azure_blob_archive.py} (69%) create mode 100755 tools/ci_build/get_docker_image.py create mode 100644 tools/ci_build/github/azure-pipelines/templates/run-docker-build-steps.yml create mode 100644 tools/ci_build/github/azure-pipelines/templates/with-build-docker-image-cache-steps.yml create mode 100644 tools/python/util/__init__.py rename tools/python/{ => util}/get_azcopy.py (91%) create mode 100644 tools/python/util/run.py diff --git a/orttraining/tools/ci_test/download_azure_blob.py b/orttraining/tools/ci_test/download_azure_blob_archive.py similarity index 69% rename from orttraining/tools/ci_test/download_azure_blob.py rename to orttraining/tools/ci_test/download_azure_blob_archive.py index 324f97b4a865a..564dcc8007ce4 100755 --- a/orttraining/tools/ci_test/download_azure_blob.py +++ b/orttraining/tools/ci_test/download_azure_blob_archive.py @@ -17,7 +17,7 @@ sys.path.append(os.path.join(REPO_DIR, "tools", "python")) -import get_azcopy # noqa: E402 +from util import get_azcopy # noqa: E402 def _download(azcopy_path, url, local_path): subprocess.run([azcopy_path, "cp", "--log-level", "NONE", url, local_path], check=True) @@ -39,19 +39,23 @@ def _check_file_sha256_digest(path, expected_digest): match = actual_digest.lower() == expected_digest.lower() if not match: raise RuntimeError( - "SHA256 digest mismatch, expected: {}, actual: {}".format(expected_digest.lower(), actual_digest.lower())) + "SHA256 digest mismatch, expected: {}, actual: {}".format( + expected_digest.lower(), actual_digest.lower())) def main(): - parser = argparse.ArgumentParser(description="Downloads training end-to-end test data.") - parser.add_argument("--azure_blob_url", required=True, help="The test data destination directory.") - parser.add_argument("--target_dir", required=True, help="The test data destination directory.") - parser.add_argument("--archive_sha256_digest", help="The test data destination directory.") + parser = argparse.ArgumentParser( + description="Downloads an Azure blob archive.") + parser.add_argument("--azure_blob_url", required=True, + help="The Azure blob URL.") + parser.add_argument("--target_dir", required=True, + help="The destination directory.") + parser.add_argument("--archive_sha256_digest", + help="The SHA256 digest of the archive. Verified if provided.") args = parser.parse_args() - with tempfile.TemporaryDirectory() as temp_dir, \ - get_azcopy.get_azcopy() as azcopy_path: + with tempfile.TemporaryDirectory() as temp_dir, get_azcopy() as azcopy_path: archive_path = os.path.join(temp_dir, "archive.zip") - print("Downloading E2E test data from '{}'...".format(args.azure_blob_url)) + print("Downloading archive from '{}'...".format(args.azure_blob_url)) _download(azcopy_path, args.azure_blob_url, archive_path) if args.archive_sha256_digest: _check_file_sha256_digest(archive_path, args.archive_sha256_digest) diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index 683663db00e38..b89feca88cd14 100755 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -10,10 +10,22 @@ import subprocess import sys import hashlib -from logger import log +from logger import get_logger from amd_hipify import amd_hipify +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) +REPO_DIR = os.path.normpath(os.path.join(SCRIPT_DIR, "..", "..")) + +sys.path.append(os.path.join(REPO_DIR, "tools", "python")) + + +from util import run # noqa: E402 + + +log = get_logger("build") + + class BaseError(Exception): """Base class for errors originating from build.py.""" pass @@ -490,8 +502,9 @@ def get_config_build_dir(build_dir, config): def run_subprocess(args, cwd=None, capture=False, dll_path=None, shell=False, env={}): - log.info("Running subprocess in '{0}'\n{1}".format( - cwd or os.getcwd(), args)) + if isinstance(args, str): + raise ValueError("args should be a sequence of strings, not a string") + my_env = os.environ.copy() if dll_path: if is_windows(): @@ -502,15 +515,9 @@ def run_subprocess(args, cwd=None, capture=False, dll_path=None, else: my_env["LD_LIBRARY_PATH"] = dll_path - stdout, stderr = (subprocess.PIPE, subprocess.STDOUT) if capture else ( - None, None) my_env.update(env) - completed_process = subprocess.run( - args, cwd=cwd, check=True, stdout=stdout, stderr=stderr, - env=my_env, shell=shell) - log.debug("Subprocess completed. Return code=" + - str(completed_process.returncode)) - return completed_process + + return run(*args, cwd=cwd, capture=capture, shell=shell, env=my_env) def update_submodules(source_dir): @@ -925,7 +932,7 @@ def generate_build_tree(cmake_path, source_dir, build_dir, cuda_home, cudnn_home args.cmake_generator == 'Visual Studio 16 2019' and args.use_full_protobuf): raise BuildError( - "Fuzz test has only be tested with build shared libs option using MSVC on windows") + "Fuzz test has only be tested with build shared libs option using MSVC on windows") cmake_args += [ "-Donnxruntime_BUILD_UNIT_TESTS=ON", "-Donnxruntime_FUZZ_TEST=ON", @@ -1163,9 +1170,9 @@ def adb_shell(*args, **kwargs): def run_android_tests(args, source_dir, config, cwd): if args.android_abi == 'x86_64': - run_subprocess(os.path.join( + run_subprocess([os.path.join( source_dir, 'tools', 'ci_build', 'github', 'android', - 'start_android_emulator.sh')) + 'start_android_emulator.sh')]) adb_push('testdata', '/data/local/tmp/', cwd=cwd) adb_push( os.path.join(source_dir, 'cmake', 'external', 'onnx', 'onnx', 'backend', 'test'), diff --git a/tools/ci_build/exclude_unused_ops.py b/tools/ci_build/exclude_unused_ops.py index 521622dced4ff..b61a87035b848 100644 --- a/tools/ci_build/exclude_unused_ops.py +++ b/tools/ci_build/exclude_unused_ops.py @@ -14,7 +14,10 @@ import typing from onnx import AttributeProto -from logger import log +from logger import get_logger + + +log = get_logger("exclude_unused_ops") def _extract_ops_from_config(file_path, required_ops): diff --git a/tools/ci_build/get_docker_image.py b/tools/ci_build/get_docker_image.py new file mode 100755 index 0000000000000..923881dfd77ad --- /dev/null +++ b/tools/ci_build/get_docker_image.py @@ -0,0 +1,166 @@ +#!/usr/bin/env python3 +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +import argparse +import collections +import hashlib +import os +import shlex +import sys +from logger import get_logger + + +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) +REPO_DIR = os.path.normpath(os.path.join(SCRIPT_DIR, "..", "..")) + +sys.path.append(os.path.join(REPO_DIR, "tools", "python")) + + +from util import run # noqa: E402 + + +log = get_logger("get_docker_image") + + +def parse_args(): + parser = argparse.ArgumentParser( + description="Gets a docker image, either by pulling it from a " + "container registry or building it locally and then pushing it. " + "The uniqueness of the docker image is determined by a hash digest of " + "the Dockerfile, the build context directory, and arguments to " + "'docker build' affecting the image content. " + "This digest value is used in the image tag. " + "This script checks whether an image with that tag is initially " + "present in the container registry to determine whether to pull or " + "build the image. " + "The user must be logged in to the container registry.") + + parser.add_argument( + "--dockerfile", default="Dockerfile", help="Path to the Dockerfile.") + parser.add_argument( + "--context", default=".", help="Path to the build context.") + parser.add_argument( + "--docker-build-args", default="", + help="String of Docker build args which may affect the image content. " + "These will be used in differentiating images from one another. " + "For example, '--build-arg'.") + parser.add_argument( + "--docker-build-args-not-affecting-image-content", default="", + help="String of Docker build args which do not affect the image " + "content.") + + parser.add_argument( + "--container-registry", required=True, + help="The Azure container registry name.") + parser.add_argument( + "--repository", required=True, help="The image repository name.") + + parser.add_argument( + "--docker-path", default="docker", help="Path to docker.") + + return parser.parse_args() + + +FileInfo = collections.namedtuple('FileInfo', ['path', 'mode']) + + +def file_info_str(file_info: FileInfo): + return "{} {}".format(file_info.path, file_info.mode) + + +def make_file_info_from_path(file_path: str): + return FileInfo(file_path, os.stat(file_path).st_mode) + + +def update_hash_with_directory(dir_file_info: FileInfo, hash_obj): + hash_obj.update(file_info_str(dir_file_info).encode()) + + files, dirs = [], [] + with os.scandir(dir_file_info.path) as dir_it: + for dir_entry in dir_it: + file_info = FileInfo(dir_entry.path, dir_entry.stat().st_mode) + if dir_entry.is_dir(): + dirs.append(file_info) + elif dir_entry.is_file(): + files.append(file_info) + + def file_info_key(file_info: FileInfo): + return file_info.path + + files.sort(key=file_info_key) + dirs.sort(key=file_info_key) + + for file_info in files: + update_hash_with_file(file_info, hash_obj) + + for file_info in dirs: + update_hash_with_directory(file_info, hash_obj) + + +def update_hash_with_file(file_info: FileInfo, hash_obj): + hash_obj.update(file_info_str(file_info).encode()) + + read_bytes_length = 8192 + with open(file_info.path, mode="rb") as file_data: + while True: + read_bytes = file_data.read(read_bytes_length) + if len(read_bytes) == 0: + break + hash_obj.update(read_bytes) + + +def generate_tag(dockerfile_path, context_path, docker_build_args_str): + hash_obj = hashlib.sha256() + hash_obj.update(docker_build_args_str.encode()) + update_hash_with_file( + make_file_info_from_path(dockerfile_path), hash_obj) + update_hash_with_directory( + make_file_info_from_path(context_path), hash_obj) + return "image_content_digest_{}".format(hash_obj.hexdigest()) + + +def container_registry_has_image(full_image_name, docker_path): + env = os.environ.copy() + env["DOCKER_CLI_EXPERIMENTAL"] = "enabled" # needed for "docker manifest" + proc = run( + docker_path, "manifest", "inspect", "--insecure", full_image_name, + env=env, check=False, quiet=True) + return proc.returncode == 0 + + +def main(): + args = parse_args() + + tag = generate_tag(args.dockerfile, args.context, args.docker_build_args) + + full_image_name = "{}.azurecr.io/{}:{}".format( + args.container_registry, args.repository, tag) + + log.info("Image: {}".format(full_image_name)) + + if container_registry_has_image(full_image_name, args.docker_path): + log.info("Image found, pulling...") + + run(args.docker_path, "pull", full_image_name) + else: + log.info("Image not found, building and pushing...") + + run(args.docker_path, "build", + "--pull", + *shlex.split(args.docker_build_args), + *shlex.split(args.docker_build_args_not_affecting_image_content), + "--tag", full_image_name, + "--file", args.dockerfile, + args.context) + + run(args.docker_path, "push", full_image_name) + + # tag so we can refer to the image by repository name + run(args.docker_path, "tag", full_image_name, args.repository) + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-ci-pipeline.yml index 4310bc38e6c08..8f2c363674719 100644 --- a/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-ci-pipeline.yml @@ -7,10 +7,10 @@ jobs: steps: - template: templates/set-test-data-variables-step.yml - # Latest TensorRT container only supports ubuntu18.04 - - script: 'tools/ci_build/github/linux/run_dockerbuild.sh -o ubuntu18.04 -d tensorrt -r $(Build.BinariesDirectory) -p 3.6 -x "--build_wheel"' - - displayName: 'Command Line Script' + - template: templates/run-docker-build-steps.yml + parameters: + # Latest TensorRT container only supports ubuntu18.04 + RunDockerBuildArgs: '-o ubuntu18.04 -d tensorrt -r $(Build.BinariesDirectory) -p 3.6 -x "--build_wheel"' - template: templates/component-governance-component-detection-steps.yml parameters : diff --git a/tools/ci_build/github/azure-pipelines/linux-multi-gpu-tensorrt-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-multi-gpu-tensorrt-ci-pipeline.yml index 5ed98d8c46a67..e51234a262956 100644 --- a/tools/ci_build/github/azure-pipelines/linux-multi-gpu-tensorrt-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/linux-multi-gpu-tensorrt-ci-pipeline.yml @@ -4,6 +4,6 @@ jobs: AgentPool : 'Linux-Multi-GPU' JobName: 'Linux_CI_Multi_GPU_TensorRT_Dev' # The latest TensorRT container only supports ubuntu18.04 - BuildCommand: 'tools/ci_build/github/linux/run_dockerbuild.sh -o ubuntu18.04 -d tensorrt -r $(Build.BinariesDirectory) -p 3.6 -x "--enable_multi_device_test"' + RunDockerBuildArgs: '-o ubuntu18.04 -d tensorrt -r $(Build.BinariesDirectory) -p 3.6 -x "--enable_multi_device_test"' DoNugetPack: 'false' ArtifactName: 'drop-linux' diff --git a/tools/ci_build/github/azure-pipelines/linux-ngraph-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-ngraph-ci-pipeline.yml index a60bc4fe6a907..0a31faa80429f 100644 --- a/tools/ci_build/github/azure-pipelines/linux-ngraph-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/linux-ngraph-ci-pipeline.yml @@ -15,8 +15,9 @@ jobs: continueOnError: true condition: always() - - script: 'tools/ci_build/github/linux/run_dockerbuild.sh -o ubuntu16.04 -d ngraph -r $(Build.BinariesDirectory) -x "--use_ngraph --build_wheel"' - displayName: 'Command Line Script' + - template: templates/run-docker-build-steps.yml + parameters: + RunDockerBuildArgs: '-o ubuntu16.04 -d ngraph -r $(Build.BinariesDirectory) -x "--use_ngraph --build_wheel"' - template: templates/component-governance-component-detection-steps.yml parameters : diff --git a/tools/ci_build/github/azure-pipelines/linux-nuphar-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-nuphar-ci-pipeline.yml index cafae402d5345..3a82967f3534c 100644 --- a/tools/ci_build/github/azure-pipelines/linux-nuphar-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/linux-nuphar-ci-pipeline.yml @@ -3,7 +3,7 @@ jobs: parameters: AgentPool : 'Linux-CPU' JobName: 'Linux_CI_Dev' - BuildCommand: 'tools/ci_build/github/linux/run_dockerbuild.sh -o ubuntu18.04 -d cpu -r $(Build.BinariesDirectory) -x "--enable_pybind --use_nuphar"' + RunDockerBuildArgs: '-o ubuntu18.04 -d cpu -r $(Build.BinariesDirectory) -x "--enable_pybind --use_nuphar"' DoNugetPack: 'false' ArtifactName: 'drop-linux' TimeoutInMinutes: 180 diff --git a/tools/ci_build/github/azure-pipelines/linux-openvino-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-openvino-ci-pipeline.yml index 51d9877ac1c94..844fc8b1ed6ca 100644 --- a/tools/ci_build/github/azure-pipelines/linux-openvino-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/linux-openvino-ci-pipeline.yml @@ -3,7 +3,7 @@ jobs: parameters: AgentPool : 'Linux-CPU' JobName: 'Linux_CI_Dev' - BuildCommand: 'tools/ci_build/github/linux/run_dockerbuild.sh -o ubuntu18.04 -d openvino -v 2021.1 -r $(Build.BinariesDirectory) -x "--use_openvino CPU_FP32 --build_wheel"' + RunDockerBuildArgs: '-o ubuntu18.04 -d openvino -v 2021.1 -r $(Build.BinariesDirectory) -x "--use_openvino CPU_FP32 --build_wheel"' DoNugetPack: 'false' ArtifactName: 'drop-linux' TimeoutInMinutes: 120 diff --git a/tools/ci_build/github/azure-pipelines/linux-openvino-nightly-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-openvino-nightly-pipeline.yml index a1d128ab56d26..62a3dec8a6d1c 100644 --- a/tools/ci_build/github/azure-pipelines/linux-openvino-nightly-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/linux-openvino-nightly-pipeline.yml @@ -20,9 +20,10 @@ jobs: arguments: --build_dir $(Build.BinariesDirectory) --edge_device pythonInterpreter: '/usr/bin/python3' workingDirectory: $(Build.BinariesDirectory) - - - script: 'tools/ci_build/github/linux/run_dockerbuild.sh -o ubuntu16.04 -d openvino -v 2020.2 -r $(Build.BinariesDirectory) -x "--use_openvino GPU_FP32 --build_wheel"' - displayName: 'Command Line Script' + + - template: templates/run-docker-build-steps.yml + parameters: + RunDockerBuildArgs: '-o ubuntu16.04 -d openvino -v 2020.2 -r $(Build.BinariesDirectory) -x "--use_openvino GPU_FP32 --build_wheel"' - template: templates/component-governance-component-detection-steps.yml parameters : diff --git a/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-ci-pipeline.yml index 85b58d01b1ad3..03fdff524c163 100644 --- a/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-ci-pipeline.yml @@ -6,8 +6,7 @@ jobs: AgentPool : 'Linux-Single-GPU-V100' JobName: 'Onnxruntime_Linux_GPU_Training' SubmoduleCheckoutMode: 'recursive' - BuildCommand: > - tools/ci_build/github/linux/run_dockerbuild.sh + RunDockerBuildArgs: > -o ubuntu16.04 -d gpu -r $(Build.BinariesDirectory) -x " --enable_training diff --git a/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-e2e-test-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-e2e-test-ci-pipeline.yml index c331def4cb015..cd7ddc221a002 100644 --- a/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-e2e-test-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-e2e-test-ci-pipeline.yml @@ -12,21 +12,23 @@ jobs: # update these if the E2E test data changes - script: | - orttraining/tools/ci_test/download_azure_blob.py \ + orttraining/tools/ci_test/download_azure_blob_archive.py \ --azure_blob_url https://onnxruntimetestdata.blob.core.windows.net/training/onnxruntime_training_data.zip?snapshot=2020-06-15T23:17:35.8314853Z \ --target_dir $(Build.BinariesDirectory)/training_e2e_test_data \ --archive_sha256_digest B01C169B6550D1A0A6F1B4E2F34AE2A8714B52DBB70AC04DA85D371F691BDFF9 displayName: 'Download onnxruntime_training_data.zip data' - - script: | - tools/ci_build/github/linux/run_dockerbuild.sh \ + - template: templates/run-docker-build-steps.yml + parameters: + RunDockerBuildArgs: | -o ubuntu16.04 -d gpu -r $(Build.BinariesDirectory) \ + -t onnxruntime_e2e_test_image \ -x " \ --config RelWithDebInfo \ --enable_training \ --update --build \ " - displayName: 'Build' + DisplayName: 'Build' - script: | docker run \ @@ -35,7 +37,7 @@ jobs: --volume $(Build.SourcesDirectory):/onnxruntime_src \ --volume $(Build.BinariesDirectory):/build \ --volume $(Build.BinariesDirectory)/training_e2e_test_data:/training_e2e_test_data:ro \ - onnxruntime-ubuntu16.04-cuda10.1-cudnn7.6 \ + onnxruntime_e2e_test_image \ /onnxruntime_src/orttraining/tools/ci_test/run_batch_size_test.py \ --binary_dir /build/RelWithDebInfo \ --model_root /training_e2e_test_data/models @@ -49,7 +51,7 @@ jobs: --volume $(Build.SourcesDirectory):/onnxruntime_src \ --volume $(Build.BinariesDirectory):/build \ --volume $(Build.BinariesDirectory)/training_e2e_test_data:/training_e2e_test_data:ro \ - onnxruntime-ubuntu16.04-cuda10.1-cudnn7.6 \ + onnxruntime_e2e_test_image \ /onnxruntime_src/orttraining/tools/ci_test/run_convergence_test.py \ --binary_dir /build/RelWithDebInfo \ --model_root /training_e2e_test_data/models \ diff --git a/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-e2e-test-nightly-pipeline.yml b/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-e2e-test-nightly-pipeline.yml index f054fe2cb58c2..3a2497f902cc8 100644 --- a/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-e2e-test-nightly-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-e2e-test-nightly-pipeline.yml @@ -12,21 +12,23 @@ jobs: # update these if the E2E test data changes - script: | - orttraining/tools/ci_test/download_azure_blob.py \ + orttraining/tools/ci_test/download_azure_blob_archive.py \ --azure_blob_url https://onnxruntimetestdata.blob.core.windows.net/training/onnxruntime_training_data.zip?snapshot=2020-06-15T23:17:35.8314853Z \ --target_dir $(Build.BinariesDirectory)/training_e2e_test_data \ --archive_sha256_digest B01C169B6550D1A0A6F1B4E2F34AE2A8714B52DBB70AC04DA85D371F691BDFF9 displayName: 'Download onnxruntime_training_data.zip data' - script: | - orttraining/tools/ci_test/download_azure_blob.py \ + orttraining/tools/ci_test/download_azure_blob_archive.py \ --azure_blob_url https://onnxruntimetestdata.blob.core.windows.net/training/glue_MRPC_data.zip \ --target_dir /bert_data/hf_data/ displayName: 'Download glue_MRPC_data.zip data' - - script: | - tools/ci_build/github/linux/run_dockerbuild.sh \ + - template: templates/run-docker-build-steps.yml + parameters: + RunDockerBuildArgs: | -o ubuntu16.04 -d gpu -r $(Build.BinariesDirectory) \ + -t onnxruntime_e2e_test_image \ -x " \ --config RelWithDebInfo \ --enable_training \ @@ -35,7 +37,7 @@ jobs: --enable_training_python_frontend_e2e_tests \ --enable_training_pipeline_e2e_tests \ " - displayName: 'Build' + DisplayName: 'Build' # Hit OOM with run_training_pipeline_e2e_tests.py - slightly above 16GB limit. # leave this code here for further investigation. @@ -48,7 +50,7 @@ jobs: # --volume $(Build.BinariesDirectory):/build \ # --volume /bert_data:/bert_data \ # --volume /bert_ort:/bert_ort \ - # onnxruntime-ubuntu16.04-cuda10.1-cudnn7.6 \ + # onnxruntime_e2e_test_image \ # /build/RelWithDebInfo/run_training_pipeline_e2e_tests.py \ # --cwd /build/RelWithDebInfo # displayName: 'Run run_training_pipeline_e2e_tests.py' @@ -62,7 +64,7 @@ jobs: --volume $(Build.SourcesDirectory):/onnxruntime_src \ --volume $(Build.BinariesDirectory):/build \ --volume $(Build.BinariesDirectory)/training_e2e_test_data:/training_e2e_test_data:ro \ - onnxruntime-ubuntu16.04-cuda10.1-cudnn7.6 \ + onnxruntime_e2e_test_image \ /onnxruntime_src/orttraining/tools/ci_test/run_batch_size_test.py \ --binary_dir /build/RelWithDebInfo \ --model_root /training_e2e_test_data/models @@ -76,7 +78,7 @@ jobs: --volume $(Build.SourcesDirectory):/onnxruntime_src \ --volume $(Build.BinariesDirectory):/build \ --volume $(Build.BinariesDirectory)/training_e2e_test_data:/training_e2e_test_data:ro \ - onnxruntime-ubuntu16.04-cuda10.1-cudnn7.6 \ + onnxruntime_e2e_test_image \ /onnxruntime_src/orttraining/tools/ci_test/run_convergence_test.py \ --binary_dir /build/RelWithDebInfo \ --model_root /training_e2e_test_data/models \ @@ -93,7 +95,7 @@ jobs: --volume $(Build.SourcesDirectory):/onnxruntime_src \ --volume $(Build.BinariesDirectory):/build \ --volume /bert_data:/bert_data \ - onnxruntime-ubuntu16.04-cuda10.1-cudnn7.6 \ + onnxruntime_e2e_test_image \ /build/RelWithDebInfo/launch_test.py \ --cmd_line_with_args "python orttraining_run_frontend_batch_size_test.py -v" \ --cwd /build/RelWithDebInfo \ @@ -110,7 +112,7 @@ jobs: --volume $(Build.SourcesDirectory):/onnxruntime_src \ --volume $(Build.BinariesDirectory):/build \ --volume /bert_data:/bert_data \ - onnxruntime-ubuntu16.04-cuda10.1-cudnn7.6 \ + onnxruntime_e2e_test_image \ /build/RelWithDebInfo/launch_test.py \ --cmd_line_with_args "mpirun -n 4 -x NCCL_DEBUG=INFO python orttraining_run_glue.py" \ --cwd /build/RelWithDebInfo @@ -126,7 +128,7 @@ jobs: --volume $(Build.SourcesDirectory):/onnxruntime_src \ --volume $(Build.BinariesDirectory):/build \ --volume /bert_data:/bert_data \ - onnxruntime-ubuntu16.04-cuda10.1-cudnn7.6 \ + onnxruntime_e2e_test_image \ /build/RelWithDebInfo/launch_test.py \ --cmd_line_with_args "python orttraining_run_glue.py ORTGlueTest.test_bert_with_mrpc -v" \ --cwd /build/RelWithDebInfo \ @@ -143,7 +145,7 @@ jobs: --volume $(Build.SourcesDirectory):/onnxruntime_src \ --volume $(Build.BinariesDirectory):/build \ --volume /bert_data:/bert_data \ - onnxruntime-ubuntu16.04-cuda10.1-cudnn7.6 \ + onnxruntime_e2e_test_image \ /build/RelWithDebInfo/launch_test.py \ --cmd_line_with_args "python orttraining_run_glue.py ORTGlueTest.test_bert_fp16_with_mrpc -v" \ --cwd /build/RelWithDebInfo \ @@ -160,7 +162,7 @@ jobs: --volume $(Build.SourcesDirectory):/onnxruntime_src \ --volume $(Build.BinariesDirectory):/build \ --volume /bert_data:/bert_data \ - onnxruntime-ubuntu16.04-cuda10.1-cudnn7.6 \ + onnxruntime_e2e_test_image \ /build/RelWithDebInfo/launch_test.py \ --cmd_line_with_args "python orttraining_run_glue.py ORTGlueTest.test_roberta_with_mrpc -v" \ --cwd /build/RelWithDebInfo \ @@ -177,7 +179,7 @@ jobs: --volume $(Build.SourcesDirectory):/onnxruntime_src \ --volume $(Build.BinariesDirectory):/build \ --volume /bert_data:/bert_data \ - onnxruntime-ubuntu16.04-cuda10.1-cudnn7.6 \ + onnxruntime_e2e_test_image \ /build/RelWithDebInfo/launch_test.py \ --cmd_line_with_args "python orttraining_run_glue.py ORTGlueTest.test_roberta_fp16_with_mrpc -v" \ --cwd /build/RelWithDebInfo \ @@ -194,7 +196,7 @@ jobs: --volume $(Build.SourcesDirectory):/onnxruntime_src \ --volume $(Build.BinariesDirectory):/build \ --volume /bert_data:/bert_data \ - onnxruntime-ubuntu16.04-cuda10.1-cudnn7.6 \ + onnxruntime_e2e_test_image \ /build/RelWithDebInfo/launch_test.py \ --cmd_line_with_args "python orttraining_run_multiple_choice.py ORTMultipleChoiceTest.test_bert_fp16_with_swag -v" \ --cwd /build/RelWithDebInfo \ @@ -211,7 +213,7 @@ jobs: --volume $(Build.SourcesDirectory):/onnxruntime_src \ --volume $(Build.BinariesDirectory):/build \ --volume /bert_data:/bert_data \ - onnxruntime-ubuntu16.04-cuda10.1-cudnn7.6 \ + onnxruntime_e2e_test_image \ /build/RelWithDebInfo/launch_test.py \ --cmd_line_with_args "python onnxruntime_test_ort_trainer_with_mixed_precision.py -v" \ --cwd /build/RelWithDebInfo @@ -226,7 +228,7 @@ jobs: --volume $(Build.SourcesDirectory):/onnxruntime_src \ --volume $(Build.BinariesDirectory):/build \ --volume /bert_data:/bert_data \ - onnxruntime-ubuntu16.04-cuda10.1-cudnn7.6 \ + onnxruntime_e2e_test_image \ /build/RelWithDebInfo/launch_test.py \ --cmd_line_with_args "python orttraining_test_transformers.py BertModelTest.test_for_pretraining_mixed_precision -v" \ --cwd /build/RelWithDebInfo @@ -242,7 +244,7 @@ jobs: --volume $(Build.SourcesDirectory):/onnxruntime_src \ --volume $(Build.BinariesDirectory):/build \ --volume /bert_data:/bert_data \ - onnxruntime-ubuntu16.04-cuda10.1-cudnn7.6 \ + onnxruntime_e2e_test_image \ /build/RelWithDebInfo/launch_test.py \ --cmd_line_with_args "mpirun -n 4 -x NCCL_DEBUG=INFO python orttraining_run_bert_pretrain.py ORTBertPretrainTest.test_pretrain_convergence" \ --cwd /build/RelWithDebInfo diff --git a/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-frontend-test-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-frontend-test-ci-pipeline.yml index 9769337dee4eb..443db8c22fb5b 100644 --- a/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-frontend-test-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-frontend-test-ci-pipeline.yml @@ -26,17 +26,18 @@ jobs: continueOnError: true condition: always() - - script: > - tools/ci_build/github/linux/run_dockerbuild.sh - -o ubuntu16.04 -d gpu -r $(Build.BinariesDirectory) - -x " - --enable_training - --config RelWithDebInfo - --skip_onnx_tests - --build_wheel - --enable_training_python_frontend_e2e_tests - --enable_training_pipeline_e2e_tests - " - displayName: 'Build and run frontend tests' + - template: templates/run-docker-build-steps.yml + parameters: + RunDockerBuildArgs: > + -o ubuntu16.04 -d gpu -r $(Build.BinariesDirectory) + -x " + --enable_training + --config RelWithDebInfo + --skip_onnx_tests + --build_wheel + --enable_training_python_frontend_e2e_tests + --enable_training_pipeline_e2e_tests + " + DisplayName: 'Build and run frontend tests' - template: templates/clean-agent-build-directory-step.yml \ No newline at end of file diff --git a/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-perf-test-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-perf-test-ci-pipeline.yml index e17de8350f7e9..9cc888018a0c0 100644 --- a/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-perf-test-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-perf-test-ci-pipeline.yml @@ -13,15 +13,17 @@ jobs: clean: true submodules: recursive - - script: > - tools/ci_build/github/linux/run_dockerbuild.sh - -o ubuntu16.04 -d gpu -r $(Build.BinariesDirectory) - -x " - --config RelWithDebInfo - --enable_training - --update --build - " - displayName: 'Build performance tests' + - template: templates/run-docker-build-steps.yml + parameters: + RunDockerBuildArgs: > + -o ubuntu16.04 -d gpu -r $(Build.BinariesDirectory) + -t onnxruntime_perf_test_image + -x " + --config RelWithDebInfo + --enable_training + --update --build + " + DisplayName: 'Build performance tests' - script: > docker run --gpus all --rm --name onnxruntime-gpu-perf @@ -29,7 +31,7 @@ jobs: --volume $(Build.BinariesDirectory):/build --volume /bert_ort/bert_models:/build/bert_models:ro --volume /bert_data:/build/bert_data:ro - -e NIGHTLY_BUILD onnxruntime-ubuntu16.04-cuda10.1-cudnn7.6 + -e NIGHTLY_BUILD onnxruntime_perf_test_image /usr/bin/python3.6 /onnxruntime_src/orttraining/tools/ci_test/run_bert_perf_test.py --binary_dir /build/RelWithDebInfo --training_data_root /build/bert_data @@ -42,7 +44,7 @@ jobs: --volume $(Build.BinariesDirectory):/build --volume /bert_ort/gpt2_models:/build/gpt2_models:ro --volume /bert_data/gpt2_data:/build/gpt2_data:ro - -e NIGHTLY_BUILD onnxruntime-ubuntu16.04-cuda10.1-cudnn7.6 + -e NIGHTLY_BUILD onnxruntime_perf_test_image /usr/bin/python3.6 /onnxruntime_src/orttraining/tools/ci_test/run_gpt2_perf_test.py --binary_dir /build/RelWithDebInfo --training_data_root /build/gpt2_data diff --git a/tools/ci_build/github/azure-pipelines/templates/linux-ci.yml b/tools/ci_build/github/azure-pipelines/templates/linux-ci.yml index 512af00c44fd1..fd32e7fd21a0f 100644 --- a/tools/ci_build/github/azure-pipelines/templates/linux-ci.yml +++ b/tools/ci_build/github/azure-pipelines/templates/linux-ci.yml @@ -2,7 +2,7 @@ parameters: AgentPool : 'Linux-CPU' JobName : 'Linux_CI_Dev' SubmoduleCheckoutMode: '' - BuildCommand: 'tools/ci_build/github/linux/run_dockerbuild.sh -o ubuntu16.04 -d cpu -r $(Build.BinariesDirectory) -x "--use_tvm --build_wheel"' + RunDockerBuildArgs: '-o ubuntu16.04 -d cpu -r $(Build.BinariesDirectory) -x "--use_tvm --build_wheel"' DoNodejsPack: 'false' DoNugetPack: 'false' NuPackScript: '' @@ -34,8 +34,9 @@ jobs: - task: NodeTool@0 inputs: versionSpec: '12.16.3' - - script: ${{ parameters.BuildCommand }} - displayName: 'Command Line Script' + - template: run-docker-build-steps.yml + parameters: + RunDockerBuildArgs: '${{ parameters.RunDockerBuildArgs }}' - task: PublishTestResults@2 displayName: 'Publish unit test results' inputs: diff --git a/tools/ci_build/github/azure-pipelines/templates/run-docker-build-steps.yml b/tools/ci_build/github/azure-pipelines/templates/run-docker-build-steps.yml new file mode 100644 index 0000000000000..483a73c279249 --- /dev/null +++ b/tools/ci_build/github/azure-pipelines/templates/run-docker-build-steps.yml @@ -0,0 +1,17 @@ +# calls tools/ci_build/github/linux/run_dockerbuild.sh + +parameters: +- name: RunDockerBuildArgs + type: string + default: "" +- name: DisplayName + type: string + default: "Call run_dockerbuild.sh" + +steps: +- template: with-build-docker-image-cache-steps.yml + parameters: + Steps: + - script: | + tools/ci_build/github/linux/run_dockerbuild.sh ${{ parameters.RunDockerBuildArgs }} + displayName: "${{ parameters.DisplayName }}" diff --git a/tools/ci_build/github/azure-pipelines/templates/with-build-docker-image-cache-steps.yml b/tools/ci_build/github/azure-pipelines/templates/with-build-docker-image-cache-steps.yml new file mode 100644 index 0000000000000..2ed3c837eb2f8 --- /dev/null +++ b/tools/ci_build/github/azure-pipelines/templates/with-build-docker-image-cache-steps.yml @@ -0,0 +1,25 @@ +# runs the specified steps while logged in to the build docker image cache +# container registry + +parameters: +- name: Steps + type: stepList + default: [] + +steps: +- task: Docker@2 + inputs: + containerRegistry: 'onnxruntimebuildcache' + command: 'login' + addPipelineData: false + displayName: "Log in to build docker image cache container registry" + +- ${{ parameters.Steps }} + +- task: Docker@2 + inputs: + containerRegistry: 'onnxruntimebuildcache' + command: 'logout' + addPipelineData: false + displayName: "Log out of build docker image cache container registry" + condition: always() diff --git a/tools/ci_build/github/download_test_data.py b/tools/ci_build/github/download_test_data.py index 0be773b51ed65..f59edf3581b1b 100755 --- a/tools/ci_build/github/download_test_data.py +++ b/tools/ci_build/github/download_test_data.py @@ -14,7 +14,7 @@ REPO_DIR = os.path.normpath(os.path.join(SCRIPT_DIR, "..", "..", "..")) sys.path.append(os.path.join(REPO_DIR, "tools", "python")) -from get_azcopy import get_azcopy # noqa: E402 +from util import get_azcopy # noqa: E402 # Hardcoded map of storage account to azure region endpoint diff --git a/tools/ci_build/github/linux/run_dockerbuild.sh b/tools/ci_build/github/linux/run_dockerbuild.sh index cbf4e8855451a..b0b70333862e9 100755 --- a/tools/ci_build/github/linux/run_dockerbuild.sh +++ b/tools/ci_build/github/linux/run_dockerbuild.sh @@ -8,7 +8,7 @@ YOCTO_VERSION="4.19" ALLOW_RELEASED_ONNX_OPSET_ONLY_ENV="ALLOW_RELEASED_ONNX_OPSET_ONLY="$ALLOW_RELEASED_ONNX_OPSET_ONLY echo "ALLOW_RELEASED_ONNX_OPSET_ONLY environment variable is set as "$ALLOW_RELEASED_ONNX_OPSET_ONLY_ENV -while getopts c:o:d:r:p:x:a:v:y: parameter_Option +while getopts c:o:d:r:p:x:a:v:y:t: parameter_Option do case "${parameter_Option}" in #android, ubuntu16.04, manylinux2010, ubuntu18.04, CentOS7 @@ -28,6 +28,9 @@ a) BUILD_ARCH=${OPTARG};; v) OPENVINO_VERSION=${OPTARG};; # YOCTO 4.19 + ACL 19.05, YOCTO 4.14 + ACL 19.02 y) YOCTO_VERSION=${OPTARG};; +# an additional name for the resulting docker image (created with "docker tag") +# this is useful for referencing the image outside of this script +t) EXTRA_IMAGE_TAG=${OPTARG};; esac done @@ -35,18 +38,19 @@ EXIT_CODE=1 PYTHON_VER=${PYTHON_VER:=3.6} echo "bo=$BUILD_OS bd=$BUILD_DEVICE bdir=$BUILD_DIR pv=$PYTHON_VER bex=$BUILD_EXTR_PAR" -# If in docker group, call "docker". Otherwise, call "sudo docker". -if id -Gnz | grep -zq "^docker$" ; then - DOCKER_CMD=docker -else - DOCKER_CMD="sudo --preserve-env docker" -fi +DOCKER_IMAGE_CACHE_CONTAINER_REGISTRY_NAME="onnxruntimebuildcache" +COMMON_GET_DOCKER_IMAGE_ARGS="--container-registry ${DOCKER_IMAGE_CACHE_CONTAINER_REGISTRY_NAME}" + +GET_DOCKER_IMAGE_CMD="${SOURCE_ROOT}/tools/ci_build/get_docker_image.py ${COMMON_GET_DOCKER_IMAGE_ARGS}" +DOCKER_CMD="docker" cd $SCRIPT_DIR/docker if [ $BUILD_OS = "android" ]; then IMAGE="android" DOCKER_FILE=Dockerfile.ubuntu_for_android - $DOCKER_CMD build --pull -t "onnxruntime-$IMAGE" --build-arg BUILD_USER=onnxruntimedev --build-arg BUILD_UID=$(id -u) --build-arg PYTHON_VERSION=${PYTHON_VER} -f $DOCKER_FILE . + $GET_DOCKER_IMAGE_CMD --repository "onnxruntime-$IMAGE" \ + --docker-build-args="--build-arg BUILD_USER=onnxruntimedev --build-arg BUILD_UID=$(id -u) --build-arg PYTHON_VERSION=${PYTHON_VER}" \ + --dockerfile $DOCKER_FILE --context . elif [ $BUILD_OS = "manylinux2010" ]; then if [ $BUILD_DEVICE = "gpu" ]; then IMAGE="manylinux2010-cuda10.1" @@ -55,11 +59,15 @@ elif [ $BUILD_OS = "manylinux2010" ]; then IMAGE="manylinux2010" DOCKER_FILE=Dockerfile.manylinux2010 fi - $DOCKER_CMD build --pull -t "onnxruntime-$IMAGE" --build-arg BUILD_USER=onnxruntimedev --build-arg BUILD_UID=$(id -u) --build-arg PYTHON_VERSION=${PYTHON_VER} -f $DOCKER_FILE . + $GET_DOCKER_IMAGE_CMD --repository "onnxruntime-$IMAGE" \ + --docker-build-args="--build-arg BUILD_USER=onnxruntimedev --build-arg BUILD_UID=$(id -u) --build-arg PYTHON_VERSION=${PYTHON_VER}" \ + --dockerfile $DOCKER_FILE --context . elif [ $BUILD_OS = "centos7" ]; then IMAGE="centos7" DOCKER_FILE=Dockerfile.centos - $DOCKER_CMD build --pull -t "onnxruntime-$IMAGE" --build-arg BUILD_USER=onnxruntimedev --build-arg BUILD_UID=$(id -u) --build-arg PYTHON_VERSION=${PYTHON_VER} -f $DOCKER_FILE . + $GET_DOCKER_IMAGE_CMD --repository "onnxruntime-$IMAGE" \ + --docker-build-args="--build-arg BUILD_USER=onnxruntimedev --build-arg BUILD_UID=$(id -u) --build-arg PYTHON_VERSION=${PYTHON_VER}" \ + --dockerfile $DOCKER_FILE --context . elif [ $BUILD_OS = "yocto" ]; then IMAGE="arm-yocto-$YOCTO_VERSION" DOCKER_FILE=Dockerfile.ubuntu_for_arm @@ -68,35 +76,51 @@ elif [ $BUILD_OS = "yocto" ]; then if [ $YOCTO_VERSION = "4.14" ]; then TOOL_CHAIN_SCRIPT=fsl-imx-xwayland-glibc-x86_64-fsl-image-qt5-aarch64-toolchain-4.14-sumo.sh fi - $DOCKER_CMD build -t "onnxruntime-$IMAGE" --build-arg TOOL_CHAIN=$TOOL_CHAIN_SCRIPT --build-arg BUILD_USER=onnxruntimedev --build-arg BUILD_UID=$(id -u) --build-arg PYTHON_VERSION=${PYTHON_VER} -f $DOCKER_FILE . + $GET_DOCKER_IMAGE_CMD --repository "onnxruntime-$IMAGE" \ + --docker-build-args="--build-arg TOOL_CHAIN=$TOOL_CHAIN_SCRIPT --build-arg BUILD_USER=onnxruntimedev --build-arg BUILD_UID=$(id -u) --build-arg PYTHON_VERSION=${PYTHON_VER}" \ + --dockerfile $DOCKER_FILE --context . else if [ $BUILD_DEVICE = "gpu" ]; then IMAGE="$BUILD_OS-$CUDA_VER" DOCKER_FILE=Dockerfile.ubuntu_gpu if [ $CUDA_VER = "cuda9.1-cudnn7.1" ]; then - DOCKER_FILE=Dockerfile.ubuntu_gpu_cuda9 + DOCKER_FILE=Dockerfile.ubuntu_gpu_cuda9 fi - $DOCKER_CMD build --pull -t "onnxruntime-$IMAGE" --build-arg BUILD_USER=onnxruntimedev --build-arg BUILD_UID=$(id -u) --build-arg PYTHON_VERSION=${PYTHON_VER} --build-arg BUILD_EXTR_PAR="${BUILD_EXTR_PAR}" -f $DOCKER_FILE . + $GET_DOCKER_IMAGE_CMD --repository "onnxruntime-$IMAGE" \ + --docker-build-args="--build-arg BUILD_USER=onnxruntimedev --build-arg BUILD_UID=$(id -u) --build-arg PYTHON_VERSION=${PYTHON_VER} --build-arg BUILD_EXTR_PAR=\"${BUILD_EXTR_PAR}\"" \ + --dockerfile $DOCKER_FILE --context . elif [ $BUILD_DEVICE = "tensorrt" ]; then # TensorRT container release 20.07 IMAGE="$BUILD_OS-cuda11.0-cudnn8.0-tensorrt7.1" DOCKER_FILE=Dockerfile.ubuntu_tensorrt - $DOCKER_CMD build --pull -t "onnxruntime-$IMAGE" --build-arg BUILD_USER=onnxruntimedev --build-arg BUILD_UID=$(id -u) --build-arg PYTHON_VERSION=${PYTHON_VER} -f $DOCKER_FILE . + $GET_DOCKER_IMAGE_CMD --repository "onnxruntime-$IMAGE" \ + --docker-build-args="--build-arg BUILD_USER=onnxruntimedev --build-arg BUILD_UID=$(id -u) --build-arg PYTHON_VERSION=${PYTHON_VER}" \ + --dockerfile $DOCKER_FILE --context . elif [ $BUILD_DEVICE = "openvino" ]; then IMAGE="$BUILD_OS-openvino" DOCKER_FILE=Dockerfile.ubuntu_openvino - $DOCKER_CMD build --pull -t "onnxruntime-$IMAGE" --build-arg BUILD_USER=onnxruntimedev --build-arg BUILD_UID=$(id -u) --build-arg PYTHON_VERSION=${PYTHON_VER} --build-arg OPENVINO_VERSION=${OPENVINO_VERSION} -f $DOCKER_FILE . + $GET_DOCKER_IMAGE_CMD --repository "onnxruntime-$IMAGE" \ + --docker-build-args="--build-arg BUILD_USER=onnxruntimedev --build-arg BUILD_UID=$(id -u) --build-arg PYTHON_VERSION=${PYTHON_VER} --build-arg OPENVINO_VERSION=${OPENVINO_VERSION}" \ + --dockerfile $DOCKER_FILE --context . else IMAGE="$BUILD_OS" if [ $BUILD_ARCH = "x86" ]; then IMAGE="$IMAGE.x86" - $DOCKER_CMD build --pull -t "onnxruntime-$IMAGE" --build-arg BUILD_USER=onnxruntimedev --build-arg BUILD_UID=$(id -u) --build-arg PYTHON_VERSION=${PYTHON_VER} -f Dockerfile.ubuntu_x86 . + $GET_DOCKER_IMAGE_CMD --repository "onnxruntime-$IMAGE" \ + --docker-build-args="--build-arg BUILD_USER=onnxruntimedev --build-arg BUILD_UID=$(id -u) --build-arg PYTHON_VERSION=${PYTHON_VER}" \ + --dockerfile Dockerfile.ubuntu_x86 --context . else - $DOCKER_CMD build --pull -t "onnxruntime-$IMAGE" --build-arg BUILD_USER=onnxruntimedev --build-arg BUILD_UID=$(id -u) --build-arg PYTHON_VERSION=${PYTHON_VER} -f Dockerfile.ubuntu . + $GET_DOCKER_IMAGE_CMD --repository "onnxruntime-$IMAGE" \ + --docker-build-args="--build-arg BUILD_USER=onnxruntimedev --build-arg BUILD_UID=$(id -u) --build-arg PYTHON_VERSION=${PYTHON_VER}" \ + --dockerfile Dockerfile.ubuntu --context . fi fi fi +if [ -v EXTRA_IMAGE_TAG ]; then + ${DOCKER_CMD} tag "onnxruntime-$IMAGE" "${EXTRA_IMAGE_TAG}" +fi + set +e mkdir -p ~/.cache/onnxruntime mkdir -p ~/.onnx @@ -108,7 +132,7 @@ fi if [ $BUILD_DEVICE = "cpu" ] || [ $BUILD_DEVICE = "ngraph" ] || [ $BUILD_DEVICE = "openvino" ] || [ $BUILD_DEVICE = "nnapi" ] || [ $BUILD_DEVICE = "arm" ]; then RUNTIME= elif [[ $BUILD_EXTR_PAR = *--enable_training_python_frontend_e2e_tests* ]]; then - RUNTIME="--gpus all --shm-size=1024m" + RUNTIME="--gpus all --shm-size=1024m" else RUNTIME="--gpus all" fi diff --git a/tools/ci_build/logger.py b/tools/ci_build/logger.py index 928b978cd1721..c15fad76e329e 100644 --- a/tools/ci_build/logger.py +++ b/tools/ci_build/logger.py @@ -1,11 +1,12 @@ -#!/usr/bin/env python3 # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. import logging -logging.basicConfig( - format="%(asctime)s %(name)s [%(levelname)s] - %(message)s", - level=logging.DEBUG) -log = logging.getLogger("Build") +def get_logger(name): + logging.basicConfig( + format="%(asctime)s %(name)s [%(levelname)s] - %(message)s", + level=logging.DEBUG) + + return logging.getLogger(name) diff --git a/tools/ci_build/op_registration_utils.py b/tools/ci_build/op_registration_utils.py index 79316d45d9ba3..8e7067d578d20 100644 --- a/tools/ci_build/op_registration_utils.py +++ b/tools/ci_build/op_registration_utils.py @@ -9,7 +9,9 @@ import sys import typing -from logger import log +from logger import get_logger + +log = get_logger("op_registration_utils") domain_map = {'': 'kOnnxDomain', 'ai.onnx': 'kOnnxDomain', diff --git a/tools/ci_build/op_registration_validator.py b/tools/ci_build/op_registration_validator.py index 1d1ec26b93545..c424cb97137a2 100644 --- a/tools/ci_build/op_registration_validator.py +++ b/tools/ci_build/op_registration_validator.py @@ -11,7 +11,9 @@ import sys import typing -from logger import log +from logger import get_logger + +log = get_logger("op_registration_validator") # deprecated ops where the last registration should have an end version. # value for each entry is the opset when it was deprecated. end version of last registration should equal value - 1. diff --git a/tools/python/util/__init__.py b/tools/python/util/__init__.py new file mode 100644 index 0000000000000..13d064e3cbc27 --- /dev/null +++ b/tools/python/util/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +from .get_azcopy import get_azcopy +from .run import run diff --git a/tools/python/get_azcopy.py b/tools/python/util/get_azcopy.py similarity index 91% rename from tools/python/get_azcopy.py rename to tools/python/util/get_azcopy.py index 520d9b17cf33e..d3cb71431e61c 100644 --- a/tools/python/get_azcopy.py +++ b/tools/python/util/get_azcopy.py @@ -1,4 +1,8 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + import contextlib +import logging import os import platform import re @@ -19,6 +23,8 @@ "Windows": "https://azcopyvnext.azureedge.net/release20200501/azcopy_windows_amd64_10.4.3.zip", } +_log = logging.getLogger("util.get_azcopy") + def _check_version(azcopy_path): proc = subprocess.run( @@ -65,7 +71,7 @@ def get_azcopy(local_azcopy_path="azcopy"): assert len(download_basename) > 0 downloaded_path = os.path.join(temp_dir, download_basename) - print("Downloading azcopy from '{}'...".format(download_url)) + _log.info("Downloading azcopy from '{}'...".format(download_url)) urllib.request.urlretrieve(download_url, downloaded_path) extracted_path = os.path.join(temp_dir, "azcopy") diff --git a/tools/python/util/run.py b/tools/python/util/run.py new file mode 100644 index 0000000000000..5afe6300977f1 --- /dev/null +++ b/tools/python/util/run.py @@ -0,0 +1,43 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +import logging +import os +import subprocess + + +_log = logging.getLogger("util.run") + + +def run(*args, cwd=None, capture=False, shell=False, env=None, check=True, + quiet=False): + """Runs a subprocess. + + Args: + *args: The subprocess arguments. + cwd: The working directory. If None, specifies the current directory. + capture: Whether to capture stdout and stderr. + shell: Whether to run using the shell. + env: The environment variables as a dict. If None, inherits the current + environment. + check: Whether to raise an error if the return code is not zero. + quiet: If true, do not print output from the subprocess. + + Returns: + A subprocess.CompletedProcess instance. + """ + cmd = [*args] + + _log.info("Running subprocess in '{0}'\n{1}".format( + cwd or os.getcwd(), cmd)) + + output = \ + subprocess.PIPE if capture else (subprocess.DEVNULL if quiet else None) + completed_process = subprocess.run( + cmd, cwd=cwd, check=check, stdout=output, stderr=output, env=env, + shell=shell) + + _log.debug("Subprocess completed. Return code: {}".format( + completed_process.returncode)) + + return completed_process