Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[civ2][docker/2] build all ray+ray-ml cuda docker images in civ2 #40139

Merged
merged 1 commit into from
Oct 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 42 additions & 5 deletions .buildkite/_forge.rayci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,49 @@ steps:
- name: manylinux
wanda: ci/docker/manylinux.wanda.yaml

- name: raypy38cu118base
wanda: ci/docker/ray.py38.cu118.base.wanda.yaml
- name: raycudabase
label: "wanda: ray.py{{matrix.python}}.cu{{matrix.cuda}}.base"
tags:
- python_dependencies
- docker
- core_cpp
wanda: ci/docker/ray.cuda.base.wanda.yaml
matrix:
setup:
python:
- "3.8"
- "3.9"
- "3.10"
- "3.11"
cuda:
- "11.5.2"
- "11.6.2"
- "11.7.1"
- "11.8.0"
- "12.1.1"
env:
PYTHON_VERSION: "{{matrix.python}}"
CUDA_VERSION: "{{matrix.cuda}}"

- name: ray-mlpy38cu118base
wanda: ci/docker/ray-ml.py38.cu118.base.wanda.yaml
depends_on: raypy38cu118base
- name: ray-mlcudabase
label: "wanda: ray-ml.py{{matrix.python}}.cu{{matrix.cuda}}.base"
tags:
- python_dependencies
- docker
- core_cpp
wanda: ci/docker/ray-ml.cuda.base.wanda.yaml
depends_on: raycudabase
matrix:
setup:
python:
- "3.8"
- "3.9"
- "3.10"
cuda:
- "11.8.0"
env:
PYTHON_VERSION: "{{matrix.python}}"
CUDA_VERSION: "{{matrix.cuda}}"

- name: oss-ci-base_test
wanda: ci/docker/base.test.wanda.yaml
Expand Down
45 changes: 37 additions & 8 deletions .buildkite/build.rayci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,27 +30,56 @@ steps:
depends_on: docbuild
job_env: forge

- label: ":tapioca: build: ray py38 cu118 docker"
- label: ":tapioca: build: ray py{{matrix.python}} cu{{matrix.cuda}} docker"
tags:
- python_dependencies
- docker
- core_cpp
instance_type: medium
commands:
- bazel run //ci/ray_ci:build_in_docker -- docker --python-version 3.8
--platform cu118 --image-type ray
- bazel run //ci/ray_ci:build_in_docker -- docker --python-version {{matrix.python}}
--platform cu{{matrix.cuda}} --image-type ray
depends_on:
- manylinux
- forge
- raypy38cu118base
- raycudabase
job_env: forge
matrix:
setup:
python:
- "3.8"
- "3.9"
- "3.10"
- "3.11"
cuda:
- "11.5.2"
- "11.6.2"
- "11.7.1"
- "11.8.0"
- "12.1.1"

- label: ":tapioca: build: ray-ml py38 cu118 docker"
- label: ":tapioca: build: ray-ml py{{matrix.python}} cu{{matrix.cuda}} docker"
tags:
- python_dependencies
- docker
- core_cpp
instance_type: medium
commands:
- bazel run //ci/ray_ci:build_in_docker -- docker --python-version 3.8
--platform cu118 --image-type ray-ml
- bazel run //ci/ray_ci:build_in_docker -- docker --python-version {{matrix.python}}
--platform cu{{matrix.cuda}} --image-type ray-ml
depends_on:
- manylinux
- forge
- ray-mlpy38cu118base
- ray-mlcudabase
job_env: forge
matrix:
setup:
python:
- "3.8"
- "3.9"
- "3.10"
cuda:
- "11.8.0"

- label: ":tapioca: build: pip-compile dependencies"
instance_type: small
Expand Down
28 changes: 2 additions & 26 deletions .buildkite/pipeline.build_release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,41 +38,17 @@
# Upload to latest directory.
- if [ "$BUILDKITE_BRANCH" == "master" ]; then python .buildkite/copy_files.py --destination wheels --path ./.whl; fi

- label: ":docker: Build Images: {{matrix}} - cpu/cu115/cu116"
- label: ":docker: Build Images: {{matrix}} - cpu"
conditions: ["RAY_CI_PYTHON_DEPENDENCIES_AFFECTED", "RAY_CI_DOCKER_AFFECTED", "RAY_CI_CORE_CPP_AFFECTED"]
instance_size: medium
commands:
- LINUX_WHEELS=1 BUILD_ONE_PYTHON_ONLY={{matrix}} ./ci/ci.sh build
- pip install -q docker aws_requests_auth boto3
- ./ci/env/env_info.sh
- if [[ "${BUILDKITE_PULL_REQUEST}" == "false" ]]; then python .buildkite/copy_files.py --destination docker_login; fi
- python ./ci/build/build-docker-images.py --py-versions {{matrix}} -T cpu -T cu115 -T cu116 --build-type BUILDKITE --build-base
- python ./ci/build/build-docker-images.py --py-versions {{matrix}} -T cpu --build-type BUILDKITE --build-base
matrix:
- py38
- py39
- py310
- py311

- label: ":docker: Build Images: {{matrix}} - cu117/cu118/cu121"
conditions: ["RAY_CI_PYTHON_DEPENDENCIES_AFFECTED", "RAY_CI_DOCKER_AFFECTED", "RAY_CI_CORE_CPP_AFFECTED"]
instance_size: medium
commands:
- LINUX_WHEELS=1 BUILD_ONE_PYTHON_ONLY={{matrix}} ./ci/ci.sh build
- pip install -q docker aws_requests_auth boto3
- ./ci/env/env_info.sh
- if [[ "${BUILDKITE_PULL_REQUEST}" == "false" ]]; then python .buildkite/copy_files.py --destination docker_login; fi
- python ./ci/build/build-docker-images.py --py-versions {{matrix}} -T cu117 -T cu118 -T cu121 --build-type BUILDKITE --build-base
matrix:
- py39
- py310
- py311

- label: ":docker: Build Images: py38 - cu117/cu121"
conditions: ["RAY_CI_PYTHON_DEPENDENCIES_AFFECTED", "RAY_CI_DOCKER_AFFECTED", "RAY_CI_CORE_CPP_AFFECTED"]
instance_size: medium
commands:
- LINUX_WHEELS=1 BUILD_ONE_PYTHON_ONLY=py38 ./ci/ci.sh build
- pip install -q docker aws_requests_auth boto3
- ./ci/env/env_info.sh
- if [[ "${BUILDKITE_PULL_REQUEST}" == "false" ]]; then python .buildkite/copy_files.py --destination docker_login; fi
- python ./ci/build/build-docker-images.py --py-versions py38 -T cu117 -T cu121 --build-type BUILDKITE --build-base
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name: "ray-mlpy38cu118base"
froms: ["cr.ray.io/rayproject/raypy38cu118base"]
name: "ray-ml-py$PYTHON_VERSION-cu$CUDA_VERSION-base"
froms: ["cr.ray.io/rayproject/ray-py$PYTHON_VERSION-cu$CUDA_VERSION-base"]
dockerfile: docker/ray-ml/Dockerfile
srcs:
- python/requirements.txt
Expand All @@ -17,6 +17,6 @@ srcs:
- python/requirements/docker/ray-docker-requirements.txt
- docker/ray-ml/install-ml-docker-requirements.sh
build_args:
- FULL_BASE_IMAGE=cr.ray.io/rayproject/raypy38cu118base
- FULL_BASE_IMAGE=cr.ray.io/rayproject/ray-py$PYTHON_VERSION-cu$CUDA_VERSION-base
tags:
- cr.ray.io/rayproject/ray-mlpy38cu118base
- cr.ray.io/rayproject/ray-ml-py$PYTHON_VERSION-cu$CUDA_VERSION-base
8 changes: 8 additions & 0 deletions ci/docker/ray.cuda.base.wanda.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
name: "ray-py$PYTHON_VERSION-cu$CUDA_VERSION-base"
froms: ["nvidia/cuda:11.8.0-cudnn8-devel-ubuntu20.04"]
dockerfile: docker/base-deps/Dockerfile
build_args:
- PYTHON_VERSION
- BASE_IMAGE=nvidia/cuda:$CUDA_VERSION-cudnn8-devel-ubuntu20.04
tags:
- cr.ray.io/rayproject/ray-py$PYTHON_VERSION-cu$CUDA_VERSION-base
8 changes: 0 additions & 8 deletions ci/docker/ray.py38.cu118.base.wanda.yaml

This file was deleted.

4 changes: 2 additions & 2 deletions ci/ray_ci/anyscale_docker_container.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,6 @@ def run(self) -> None:

def _get_requirement_file(self) -> str:
prefix = "requirements" if self.image_type == "ray" else "requirements_ml"
postfix = self.python_version[len("py3") :]
postfix = self.python_version

return f"{prefix}_byod_3.{postfix}.txt"
return f"{prefix}_byod_{postfix}.txt"
2 changes: 1 addition & 1 deletion ci/ray_ci/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
)
@click.option(
"--platform",
default="cu118",
default="cu11.8.0",
type=click.Choice(list(PLATFORM)),
help=("Platform to build the docker with"),
)
Expand Down
28 changes: 22 additions & 6 deletions ci/ray_ci/docker_container.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,16 @@
from ci.ray_ci.container import Container


PLATFORM = ["cu118"]
GPU_PLATFORM = "cu118"
DEFAULT_PYTHON_VERSION = "py38"
PLATFORM = [
"cpu",
"cu11.5.2",
"cu11.6.2",
"cu11.7.1",
"cu11.8.0",
"cu12.1.1",
]
GPU_PLATFORM = "cu11.8.0"
DEFAULT_PYTHON_VERSION = "3.8"


class DockerContainer(Container):
Expand All @@ -17,7 +24,7 @@ class DockerContainer(Container):
def __init__(self, python_version: str, platform: str, image_type: str) -> None:
assert "RAYCI_CHECKOUT_DIR" in os.environ, "RAYCI_CHECKOUT_DIR not set"
rayci_checkout_dir = os.environ["RAYCI_CHECKOUT_DIR"]
self.python_version = f"py{python_version.replace('.', '')}" # 3.8 -> py38
self.python_version = python_version
self.platform = platform
self.image_type = image_type

Expand Down Expand Up @@ -49,13 +56,22 @@ def _get_canonical_tag(self) -> str:
# e.g. sha-pyversion-platform
return self._get_image_tags()[0]

def get_python_version_tag(self) -> str:
return f"-py{self.python_version.replace('.', '')}" # 3.8 -> py38

def get_platform_tag(self) -> str:
if self.platform == "cpu":
return "-cpu"
versions = self.platform.split(".")
return f"-{versions[0]}{versions[1]}" # cu11.8.0 -> cu118
Comment on lines +62 to +66
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(lonnie bot speaking) add unit test for this function?


def _get_image_tags(self) -> List[str]:
# An image tag is composed by ray version tag, python version and platform.
# See https://docs.ray.io/en/latest/ray-overview/installation.html for
# more information on the image tags.
versions = self._get_image_version_tags()

platforms = [f"-{self.platform}"]
platforms = [self.get_platform_tag()]
if self.platform == "cpu" and self.image_type == "ray":
# no tag is alias to cpu for ray image
platforms.append("")
Expand All @@ -66,7 +82,7 @@ def _get_image_tags(self) -> List[str]:
# no tag is alias to gpu for ray-ml image
platforms.append("")

py_versions = [f"-{self.python_version}"]
py_versions = [self.get_python_version_tag()]
if self.python_version == DEFAULT_PYTHON_VERSION:
py_versions.append("")

Expand Down
6 changes: 3 additions & 3 deletions ci/ray_ci/ray_docker_container.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,12 @@ def run(self) -> None:

base_image = (
f"{_DOCKER_ECR_REPO}:{rayci_build_id}"
f"-{self.image_type}{self.python_version}{self.platform}base"
f"-{self.image_type}-py{self.python_version}-{self.platform}-base"
)

docker_pull(base_image)

bin_path_py_version = f"3.{self.python_version[3:]}" # py38 -> 3.8
bin_path = PYTHON_VERSIONS[bin_path_py_version]["bin_path"]
bin_path = PYTHON_VERSIONS[self.python_version]["bin_path"]
wheel_name = f"ray-{RAY_VERSION}-{bin_path}-manylinux2014_x86_64.whl"

constraints_file = "requirements_compiled.txt"
Expand Down
2 changes: 1 addition & 1 deletion ci/ray_ci/test_anyscale_docker_container.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def _mock_run_script(input: List[str]) -> None:
"ci.ray_ci.docker_container.Container.run_script",
side_effect=_mock_run_script,
):
container = AnyscaleDockerContainer("3.8", "cu118", "ray")
container = AnyscaleDockerContainer("3.8", "cu11.8.0", "ray")
container.run()
cmd = self.cmds[-1]
assert cmd == (
Expand Down
21 changes: 16 additions & 5 deletions ci/ray_ci/test_ray_docker_container.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,13 @@ def _mock_run_script(input: List[str]) -> None:
"ci.ray_ci.docker_container.Container.run_script",
side_effect=_mock_run_script,
):
container = RayDockerContainer("3.8", "cu118", "ray")
container = RayDockerContainer("3.8", "cu11.8.0", "ray")
container.run()
cmd = self.cmds[-1]
assert cmd == (
"./ci/build/build-ray-docker.sh "
f"ray-{RAY_VERSION}-cp38-cp38-manylinux2014_x86_64.whl "
f"{_DOCKER_ECR_REPO}:123-raypy38cu118base "
f"{_DOCKER_ECR_REPO}:123-ray-py3.8-cu11.8.0-base "
"requirements_compiled.txt "
"rayproject/ray:123456-py38-cu118"
)
Expand All @@ -41,7 +41,7 @@ def _mock_run_script(input: List[str]) -> None:
assert cmd == (
"./ci/build/build-ray-docker.sh "
f"ray-{RAY_VERSION}-cp39-cp39-manylinux2014_x86_64.whl "
f"{_DOCKER_ECR_REPO}:123-ray-mlpy39cpubase "
f"{_DOCKER_ECR_REPO}:123-ray-ml-py3.9-cpu-base "
"requirements_compiled.txt "
"rayproject/ray-ml:123456-py39-cpu"
)
Expand All @@ -50,7 +50,7 @@ def test_canonical_tag(self) -> None:
container = RayDockerContainer("3.8", "cpu", "ray")
assert container._get_canonical_tag() == "123456-py38-cpu"

container = RayDockerContainer("3.8", "cu118", "ray-ml")
container = RayDockerContainer("3.8", "cu11.8.0", "ray-ml")
assert container._get_canonical_tag() == "123456-py38-cu118"

with mock.patch.dict(os.environ, {"BUILDKITE_BRANCH": "releases/1.0.0"}):
Expand Down Expand Up @@ -85,7 +85,7 @@ def test_get_image_name(self) -> None:
"rayproject/ray:nightly",
]

container = RayDockerContainer("3.9", "cu118", "ray-ml")
container = RayDockerContainer("3.9", "cu11.8.0", "ray-ml")
assert container._get_image_names() == [
"rayproject/ray-ml:123456-py39-cu118",
"rayproject/ray-ml:123456-py39-gpu",
Expand All @@ -104,6 +104,17 @@ def test_get_image_name(self) -> None:
"rayproject/ray:1.0.0.123456",
]

def test_get_python_version_tag(self) -> None:
container = RayDockerContainer("3.8", "cpu", "ray")
assert container.get_python_version_tag() == "-py38"

def test_get_platform_tag(self) -> None:
container = RayDockerContainer("3.8", "cpu", "ray")
assert container.get_platform_tag() == "-cpu"

container = RayDockerContainer("3.8", "cu11.8.0", "ray")
assert container.get_platform_tag() == "-cu118"


if __name__ == "__main__":
sys.exit(pytest.main(["-v", __file__]))