Skip to content

Commit

Permalink
Cache build docker images in container registry. (microsoft#5811)
Browse files Browse the repository at this point in the history
This PR adds infrastructure to automatically cache docker images used in CI builds in a container registry.

Currently, build images are pulled from a container registry for some builds and built every time for others. The container registry requires maintenance to keep the images up to date and building images every time wastes build agent resources.

With this change, a given build image can be looked up in a cache container registry and if present, pulled, and otherwise, built and pushed. The uniqueness of a build image is determined by a hash digest of the dockerfile, docker build context directory, and certain "docker build" options. This digest is part of the image tag in the cache container repository.

The cache container registry will need to be cleaned up periodically. This is not automated yet.
  • Loading branch information
edgchen1 authored Nov 18, 2020
1 parent 252dbf1 commit 71e7c2b
Show file tree
Hide file tree
Showing 26 changed files with 429 additions and 115 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

sys.path.append(os.path.join(REPO_DIR, "tools", "python"))

import get_azcopy # noqa: E402
from util import get_azcopy # noqa: E402

def _download(azcopy_path, url, local_path):
subprocess.run([azcopy_path, "cp", "--log-level", "NONE", url, local_path], check=True)
Expand All @@ -39,19 +39,23 @@ def _check_file_sha256_digest(path, expected_digest):
match = actual_digest.lower() == expected_digest.lower()
if not match:
raise RuntimeError(
"SHA256 digest mismatch, expected: {}, actual: {}".format(expected_digest.lower(), actual_digest.lower()))
"SHA256 digest mismatch, expected: {}, actual: {}".format(
expected_digest.lower(), actual_digest.lower()))

def main():
parser = argparse.ArgumentParser(description="Downloads training end-to-end test data.")
parser.add_argument("--azure_blob_url", required=True, help="The test data destination directory.")
parser.add_argument("--target_dir", required=True, help="The test data destination directory.")
parser.add_argument("--archive_sha256_digest", help="The test data destination directory.")
parser = argparse.ArgumentParser(
description="Downloads an Azure blob archive.")
parser.add_argument("--azure_blob_url", required=True,
help="The Azure blob URL.")
parser.add_argument("--target_dir", required=True,
help="The destination directory.")
parser.add_argument("--archive_sha256_digest",
help="The SHA256 digest of the archive. Verified if provided.")
args = parser.parse_args()

with tempfile.TemporaryDirectory() as temp_dir, \
get_azcopy.get_azcopy() as azcopy_path:
with tempfile.TemporaryDirectory() as temp_dir, get_azcopy() as azcopy_path:
archive_path = os.path.join(temp_dir, "archive.zip")
print("Downloading E2E test data from '{}'...".format(args.azure_blob_url))
print("Downloading archive from '{}'...".format(args.azure_blob_url))
_download(azcopy_path, args.azure_blob_url, archive_path)
if args.archive_sha256_digest:
_check_file_sha256_digest(archive_path, args.archive_sha256_digest)
Expand Down
35 changes: 21 additions & 14 deletions tools/ci_build/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,22 @@
import subprocess
import sys
import hashlib
from logger import log
from logger import get_logger
from amd_hipify import amd_hipify


SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
REPO_DIR = os.path.normpath(os.path.join(SCRIPT_DIR, "..", ".."))

sys.path.append(os.path.join(REPO_DIR, "tools", "python"))


from util import run # noqa: E402


log = get_logger("build")


class BaseError(Exception):
"""Base class for errors originating from build.py."""
pass
Expand Down Expand Up @@ -490,8 +502,9 @@ def get_config_build_dir(build_dir, config):

def run_subprocess(args, cwd=None, capture=False, dll_path=None,
shell=False, env={}):
log.info("Running subprocess in '{0}'\n{1}".format(
cwd or os.getcwd(), args))
if isinstance(args, str):
raise ValueError("args should be a sequence of strings, not a string")

my_env = os.environ.copy()
if dll_path:
if is_windows():
Expand All @@ -502,15 +515,9 @@ def run_subprocess(args, cwd=None, capture=False, dll_path=None,
else:
my_env["LD_LIBRARY_PATH"] = dll_path

stdout, stderr = (subprocess.PIPE, subprocess.STDOUT) if capture else (
None, None)
my_env.update(env)
completed_process = subprocess.run(
args, cwd=cwd, check=True, stdout=stdout, stderr=stderr,
env=my_env, shell=shell)
log.debug("Subprocess completed. Return code=" +
str(completed_process.returncode))
return completed_process

return run(*args, cwd=cwd, capture=capture, shell=shell, env=my_env)


def update_submodules(source_dir):
Expand Down Expand Up @@ -925,7 +932,7 @@ def generate_build_tree(cmake_path, source_dir, build_dir, cuda_home, cudnn_home
args.cmake_generator == 'Visual Studio 16 2019' and
args.use_full_protobuf):
raise BuildError(
"Fuzz test has only be tested with build shared libs option using MSVC on windows")
"Fuzz test has only be tested with build shared libs option using MSVC on windows")
cmake_args += [
"-Donnxruntime_BUILD_UNIT_TESTS=ON",
"-Donnxruntime_FUZZ_TEST=ON",
Expand Down Expand Up @@ -1163,9 +1170,9 @@ def adb_shell(*args, **kwargs):

def run_android_tests(args, source_dir, config, cwd):
if args.android_abi == 'x86_64':
run_subprocess(os.path.join(
run_subprocess([os.path.join(
source_dir, 'tools', 'ci_build', 'github', 'android',
'start_android_emulator.sh'))
'start_android_emulator.sh')])
adb_push('testdata', '/data/local/tmp/', cwd=cwd)
adb_push(
os.path.join(source_dir, 'cmake', 'external', 'onnx', 'onnx', 'backend', 'test'),
Expand Down
5 changes: 4 additions & 1 deletion tools/ci_build/exclude_unused_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@
import typing

from onnx import AttributeProto
from logger import log
from logger import get_logger


log = get_logger("exclude_unused_ops")


def _extract_ops_from_config(file_path, required_ops):
Expand Down
166 changes: 166 additions & 0 deletions tools/ci_build/get_docker_image.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
#!/usr/bin/env python3
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

import argparse
import collections
import hashlib
import os
import shlex
import sys
from logger import get_logger


SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
REPO_DIR = os.path.normpath(os.path.join(SCRIPT_DIR, "..", ".."))

sys.path.append(os.path.join(REPO_DIR, "tools", "python"))


from util import run # noqa: E402


log = get_logger("get_docker_image")


def parse_args():
parser = argparse.ArgumentParser(
description="Gets a docker image, either by pulling it from a "
"container registry or building it locally and then pushing it. "
"The uniqueness of the docker image is determined by a hash digest of "
"the Dockerfile, the build context directory, and arguments to "
"'docker build' affecting the image content. "
"This digest value is used in the image tag. "
"This script checks whether an image with that tag is initially "
"present in the container registry to determine whether to pull or "
"build the image. "
"The user must be logged in to the container registry.")

parser.add_argument(
"--dockerfile", default="Dockerfile", help="Path to the Dockerfile.")
parser.add_argument(
"--context", default=".", help="Path to the build context.")
parser.add_argument(
"--docker-build-args", default="",
help="String of Docker build args which may affect the image content. "
"These will be used in differentiating images from one another. "
"For example, '--build-arg'.")
parser.add_argument(
"--docker-build-args-not-affecting-image-content", default="",
help="String of Docker build args which do not affect the image "
"content.")

parser.add_argument(
"--container-registry", required=True,
help="The Azure container registry name.")
parser.add_argument(
"--repository", required=True, help="The image repository name.")

parser.add_argument(
"--docker-path", default="docker", help="Path to docker.")

return parser.parse_args()


FileInfo = collections.namedtuple('FileInfo', ['path', 'mode'])


def file_info_str(file_info: FileInfo):
return "{} {}".format(file_info.path, file_info.mode)


def make_file_info_from_path(file_path: str):
return FileInfo(file_path, os.stat(file_path).st_mode)


def update_hash_with_directory(dir_file_info: FileInfo, hash_obj):
hash_obj.update(file_info_str(dir_file_info).encode())

files, dirs = [], []
with os.scandir(dir_file_info.path) as dir_it:
for dir_entry in dir_it:
file_info = FileInfo(dir_entry.path, dir_entry.stat().st_mode)
if dir_entry.is_dir():
dirs.append(file_info)
elif dir_entry.is_file():
files.append(file_info)

def file_info_key(file_info: FileInfo):
return file_info.path

files.sort(key=file_info_key)
dirs.sort(key=file_info_key)

for file_info in files:
update_hash_with_file(file_info, hash_obj)

for file_info in dirs:
update_hash_with_directory(file_info, hash_obj)


def update_hash_with_file(file_info: FileInfo, hash_obj):
hash_obj.update(file_info_str(file_info).encode())

read_bytes_length = 8192
with open(file_info.path, mode="rb") as file_data:
while True:
read_bytes = file_data.read(read_bytes_length)
if len(read_bytes) == 0:
break
hash_obj.update(read_bytes)


def generate_tag(dockerfile_path, context_path, docker_build_args_str):
hash_obj = hashlib.sha256()
hash_obj.update(docker_build_args_str.encode())
update_hash_with_file(
make_file_info_from_path(dockerfile_path), hash_obj)
update_hash_with_directory(
make_file_info_from_path(context_path), hash_obj)
return "image_content_digest_{}".format(hash_obj.hexdigest())


def container_registry_has_image(full_image_name, docker_path):
env = os.environ.copy()
env["DOCKER_CLI_EXPERIMENTAL"] = "enabled" # needed for "docker manifest"
proc = run(
docker_path, "manifest", "inspect", "--insecure", full_image_name,
env=env, check=False, quiet=True)
return proc.returncode == 0


def main():
args = parse_args()

tag = generate_tag(args.dockerfile, args.context, args.docker_build_args)

full_image_name = "{}.azurecr.io/{}:{}".format(
args.container_registry, args.repository, tag)

log.info("Image: {}".format(full_image_name))

if container_registry_has_image(full_image_name, args.docker_path):
log.info("Image found, pulling...")

run(args.docker_path, "pull", full_image_name)
else:
log.info("Image not found, building and pushing...")

run(args.docker_path, "build",
"--pull",
*shlex.split(args.docker_build_args),
*shlex.split(args.docker_build_args_not_affecting_image_content),
"--tag", full_image_name,
"--file", args.dockerfile,
args.context)

run(args.docker_path, "push", full_image_name)

# tag so we can refer to the image by repository name
run(args.docker_path, "tag", full_image_name, args.repository)

return 0


if __name__ == "__main__":
sys.exit(main())
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@ jobs:
steps:
- template: templates/set-test-data-variables-step.yml

# Latest TensorRT container only supports ubuntu18.04
- script: 'tools/ci_build/github/linux/run_dockerbuild.sh -o ubuntu18.04 -d tensorrt -r $(Build.BinariesDirectory) -p 3.6 -x "--build_wheel"'

displayName: 'Command Line Script'
- template: templates/run-docker-build-steps.yml
parameters:
# Latest TensorRT container only supports ubuntu18.04
RunDockerBuildArgs: '-o ubuntu18.04 -d tensorrt -r $(Build.BinariesDirectory) -p 3.6 -x "--build_wheel"'

- template: templates/component-governance-component-detection-steps.yml
parameters :
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@ jobs:
AgentPool : 'Linux-Multi-GPU'
JobName: 'Linux_CI_Multi_GPU_TensorRT_Dev'
# The latest TensorRT container only supports ubuntu18.04
BuildCommand: 'tools/ci_build/github/linux/run_dockerbuild.sh -o ubuntu18.04 -d tensorrt -r $(Build.BinariesDirectory) -p 3.6 -x "--enable_multi_device_test"'
RunDockerBuildArgs: '-o ubuntu18.04 -d tensorrt -r $(Build.BinariesDirectory) -p 3.6 -x "--enable_multi_device_test"'
DoNugetPack: 'false'
ArtifactName: 'drop-linux'
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@ jobs:
continueOnError: true
condition: always()

- script: 'tools/ci_build/github/linux/run_dockerbuild.sh -o ubuntu16.04 -d ngraph -r $(Build.BinariesDirectory) -x "--use_ngraph --build_wheel"'
displayName: 'Command Line Script'
- template: templates/run-docker-build-steps.yml
parameters:
RunDockerBuildArgs: '-o ubuntu16.04 -d ngraph -r $(Build.BinariesDirectory) -x "--use_ngraph --build_wheel"'

- template: templates/component-governance-component-detection-steps.yml
parameters :
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ jobs:
parameters:
AgentPool : 'Linux-CPU'
JobName: 'Linux_CI_Dev'
BuildCommand: 'tools/ci_build/github/linux/run_dockerbuild.sh -o ubuntu18.04 -d cpu -r $(Build.BinariesDirectory) -x "--enable_pybind --use_nuphar"'
RunDockerBuildArgs: '-o ubuntu18.04 -d cpu -r $(Build.BinariesDirectory) -x "--enable_pybind --use_nuphar"'
DoNugetPack: 'false'
ArtifactName: 'drop-linux'
TimeoutInMinutes: 180
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ jobs:
parameters:
AgentPool : 'Linux-CPU'
JobName: 'Linux_CI_Dev'
BuildCommand: 'tools/ci_build/github/linux/run_dockerbuild.sh -o ubuntu18.04 -d openvino -v 2021.1 -r $(Build.BinariesDirectory) -x "--use_openvino CPU_FP32 --build_wheel"'
RunDockerBuildArgs: '-o ubuntu18.04 -d openvino -v 2021.1 -r $(Build.BinariesDirectory) -x "--use_openvino CPU_FP32 --build_wheel"'
DoNugetPack: 'false'
ArtifactName: 'drop-linux'
TimeoutInMinutes: 120
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,10 @@ jobs:
arguments: --build_dir $(Build.BinariesDirectory) --edge_device
pythonInterpreter: '/usr/bin/python3'
workingDirectory: $(Build.BinariesDirectory)

- script: 'tools/ci_build/github/linux/run_dockerbuild.sh -o ubuntu16.04 -d openvino -v 2020.2 -r $(Build.BinariesDirectory) -x "--use_openvino GPU_FP32 --build_wheel"'
displayName: 'Command Line Script'

- template: templates/run-docker-build-steps.yml
parameters:
RunDockerBuildArgs: '-o ubuntu16.04 -d openvino -v 2020.2 -r $(Build.BinariesDirectory) -x "--use_openvino GPU_FP32 --build_wheel"'

- template: templates/component-governance-component-detection-steps.yml
parameters :
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@ jobs:
AgentPool : 'Linux-Single-GPU-V100'
JobName: 'Onnxruntime_Linux_GPU_Training'
SubmoduleCheckoutMode: 'recursive'
BuildCommand: >
tools/ci_build/github/linux/run_dockerbuild.sh
RunDockerBuildArgs: >
-o ubuntu16.04 -d gpu -r $(Build.BinariesDirectory)
-x "
--enable_training
Expand Down
Loading

0 comments on commit 71e7c2b

Please sign in to comment.