diff --git a/.github/actions/setup_env_vars/action.yml b/.github/actions/setup_env_vars/action.yml new file mode 100644 index 0000000000..fb1aae452e --- /dev/null +++ b/.github/actions/setup_env_vars/action.yml @@ -0,0 +1,59 @@ +name: "Setup Env Vars" + +runs: + using: "composite" + steps: + - name: Get Commit SHA (For Push Event) + if: ${{ github.event_name == 'push' }} + shell: bash + env: + SHA: ${{ github.sha }} + git_repo_full: ${{ github.repository }} + pr_number: "" + run: | + short_sha=$(git rev-parse --short "$SHA") + echo "SHORT_SHA=$short_sha" >> $GITHUB_ENV + echo "PR_NUMBER=$pr_number" >> $GITHUB_ENV + + target_branch=${GITHUB_REF##*/} + echo "TARGET_BRANCH=$target_branch" >> $GITHUB_ENV + + repo_name=${git_repo_full##*/} + echo "REPO_NAME=$repo_name" >> $GITHUB_ENV + + IFS='-' read -ra name_parts <<< "$repo_name" + echo "LANG=${name_parts[1]}" >> $GITHUB_ENV + + task="${repo_name}-${target_branch}" + echo "TASK=$task" >> $GITHUB_ENV + + job_name=${repo_name}/${target_branch} + echo "JOB_NAME=$job_name" >> $GITHUB_ENV + + + - name: Get Commit SHA (For Pull Request) + if: ${{ github.event_name == 'pull_request_target' }} + shell: bash + env: + SHA: ${{ github.event.pull_request.head.sha }} + target_branch: ${{ github.event.pull_request.base.ref }} + git_repo_full: ${{ github.event.pull_request.base.repo.full_name }} + pr_number: PR-${{ github.event.number }} + run: | + short_sha=$(git rev-parse --short "$SHA") + echo "SHORT_SHA=$short_sha" >> $GITHUB_ENV + echo "PR_NUMBER=$pr_number" >> $GITHUB_ENV + + echo "TARGET_BRANCH=$target_branch" >> $GITHUB_ENV + + repo_name=${git_repo_full##*/} + echo "REPO_NAME=$repo_name" >> $GITHUB_ENV + + IFS='-' read -ra name_parts <<< "$repo_name" + echo "LANG=${name_parts[1]}" >> $GITHUB_ENV + + task="${repo_name}-${target_branch}" + echo "TASK=$task" >> $GITHUB_ENV + + job_name=${repo_name}/PR-${{ github.event.number }}/${short_sha} + echo "JOB_NAME=$job_name" >> $GITHUB_ENV diff --git a/.github/actions/submit-job/action.yml b/.github/actions/submit-job/action.yml new file mode 100644 index 0000000000..211d45a678 --- /dev/null +++ b/.github/actions/submit-job/action.yml @@ -0,0 +1,91 @@ +name: "Submit Job to AWS Batch" +inputs: + job-type: + required: true + job-name: + required: true + work-dir: + required: false + default: . + command: + required: true + +runs: + using: "composite" + steps: + - name: Configure AWS Credentials + uses: aws-actions/configure-aws-credentials@v2 + with: + role-to-assume: arn:aws:iam::650140442593:role/D2L_CI_Batch + role-duration-seconds: 14400 # this requires changing max session duration to 4hrs in AWS Console for D2L_CI_Batch + aws-region: us-west-2 + + - name: Install dependencies + shell: bash + run: | + pip install boto3 + + - name: Check for Actor Permissions + id: check + continue-on-error: true + uses: prince-chrismc/check-actor-permissions-action@v2 + with: + github_token: ${{ github.token }} + permission: write + + - name: Submit Job (For Push on development branches) + if: ${{ github.event_name == 'push' && github.ref != 'refs/heads/release' && github.ref != 'refs/heads/classic' }} + shell: bash + run: | + echo "Start submitting job for a Push Event on a Development Branch" + # Add "-push" for all these jobs to use elevated push level job-def permissions + python ./ci/submit-job.py --job-type ${{ inputs.job-type }}-push \ + --name ${{ inputs.job-name }}-'${{ github.ref }}' \ + --source-ref '${{ github.ref }}' \ + --work-dir ${{ inputs.work-dir }} \ + --remote https://github.com/'${{ github.repository }}' \ + --command "${{ inputs.command }}" \ + --safe-to-use-script \ + --wait + + - name: Submit Job (For Push on Release/Classic) + if: ${{ github.event_name == 'push' && (github.ref == 'refs/heads/release' || github.ref == 'refs/heads/classic') }} + shell: bash + run: | + echo "Start submitting job for a Push Event on Release/Classic Branch" + # Add "-release" for all these jobs to use elevated release level job-def permissions + python ./ci/submit-job.py --job-type ${{ inputs.job-type }}-release \ + --name ${{ inputs.job-name }}-'${{ github.ref }}' \ + --source-ref '${{ github.ref }}' \ + --work-dir ${{ inputs.work-dir }} \ + --remote https://github.com/'${{ github.repository }}' \ + --command "${{ inputs.command }}" \ + --safe-to-use-script \ + --wait + + - name: Submit Job (For Pull Request Safe Scripts) + if: ${{ github.event_name == 'pull_request_target' && steps.check.outputs.permitted == 'true' }} + shell: bash + run: | + echo "Start submitting job" + python ./ci/submit-job.py --job-type ${{ inputs.job-type }} \ + --name ${{ inputs.job-name }}-PR#'${{ github.event.number }}' \ + --source-ref '${{ github.event.pull_request.head.sha }}' \ + --work-dir ${{ inputs.work-dir }} \ + --remote https://github.com/'${{ github.event.pull_request.head.repo.full_name }}' \ + --command "${{ inputs.command }}" \ + --safe-to-use-script \ + --wait + + - name: Submit Job (For Pull Request Not Safe Scripts) + if: ${{ github.event_name == 'pull_request_target' && steps.check.outputs.permitted != 'true' }} + shell: bash + run: | + echo "Start submitting job" + python ./ci/submit-job.py --job-type ${{ inputs.job-type }} \ + --name ${{ inputs.job-name }}-PR#'${{ github.event.number }}' \ + --source-ref '${{ github.event.pull_request.head.sha }}' \ + --work-dir ${{ inputs.work-dir }} \ + --remote https://github.com/'${{ github.event.pull_request.head.repo.full_name }}' \ + --command "${{ inputs.command }}" \ + --wait diff --git a/.github/workflow_scripts/build_and_deploy.sh b/.github/workflow_scripts/build_and_deploy.sh new file mode 100755 index 0000000000..4398205e8d --- /dev/null +++ b/.github/workflow_scripts/build_and_deploy.sh @@ -0,0 +1,47 @@ +#!/bin/bash + +# Move all scripts related to html here! + +set -ex + +REPO_NAME="$1" # Eg. 'd2l-en' +TARGET_BRANCH="$2" # Eg. 'master' ; if PR raised to master +JOB_NAME="$3" # Eg. 'd2l-en/master' or 'd2l-en/PR-2453/21be1a4' +LANG="$4" # Eg. 'en','zh' etc. + +pip3 install . +mkdir _build + +# Move aws copy commands for cache restore outside +aws s3 sync s3://preview.d2l.ai/ci_cache/$REPO_NAME-$TARGET_BRANCH/_build _build --delete --quiet --exclude 'eval*/data/*' + +# Build D2L Website +./.github/workflow_scripts/build_html.sh $TARGET_BRANCH $JOB_NAME + +# Build PDFs +d2lbook build pdf +d2lbook build pdf --tab mxnet + + +# Check if the JOB_NAME is either "$REPO_NAME/release" or "$REPO_NAME/classic" +if [[ "$JOB_NAME" == "$REPO_NAME/release" || "$JOB_NAME" == "$REPO_NAME/classic" ]]; then + + # Setup D2L Bot + source $(dirname "$0")/setup_git.sh + setup_git + + # Run d2lbook release deployment + if [[ "$JOB_NAME" == *"/classic" ]]; then + # Use classic s3 bucket for classic release + LANG="classic" + fi + d2lbook build pkg + d2lbook deploy html pdf pkg colab sagemaker slides --s3 "s3://${LANG}.d2l.ai/" + +else + # Run d2lbook preview deployment + d2lbook deploy html pdf --s3 "s3://preview.d2l.ai/${JOB_NAME}/" +fi + +# Move aws copy commands for cache store outside +aws s3 sync _build s3://preview.d2l.ai/ci_cache/$REPO_NAME-$TARGET_BRANCH/_build --acl public-read --quiet --exclude 'eval*/data/*' diff --git a/static/build_html.sh b/.github/workflow_scripts/build_html.sh similarity index 100% rename from static/build_html.sh rename to .github/workflow_scripts/build_html.sh diff --git a/.github/workflow_scripts/build_jax.sh b/.github/workflow_scripts/build_jax.sh new file mode 100644 index 0000000000..e4296bbc88 --- /dev/null +++ b/.github/workflow_scripts/build_jax.sh @@ -0,0 +1,34 @@ +#!/bin/bash + +set -ex + +# Used to capture status exit of build eval command +ss=0 + +REPO_NAME="$1" # Eg. 'd2l-en' +TARGET_BRANCH="$2" # Eg. 'master' ; if PR raised to master + +pip3 install . +mkdir _build + +# Move sanity check outside +d2lbook build outputcheck tabcheck + +# Move aws copy commands for cache restore outside +echo "Retrieving jax build cache" +aws s3 sync s3://preview.d2l.ai/ci_cache/"$REPO_NAME"-"$TARGET_BRANCH"/_build/eval_jax/ _build/eval_jax/ --delete --quiet --exclude 'data/*' + +export XLA_PYTHON_CLIENT_MEM_FRACTION=.70 +export TF_CPP_MIN_LOG_LEVEL=3 +export TF_FORCE_GPU_ALLOW_GROWTH=true +# Continue the script even if some notebooks in build fail to +# make sure that cache is copied to s3 for the successful notebooks +d2lbook build eval --tab jax || ((ss=1)) + +# Move aws copy commands for cache store outside +echo "Upload jax build cache to s3" +aws s3 sync _build s3://preview.d2l.ai/ci_cache/"$REPO_NAME"-"$TARGET_BRANCH"/_build --acl public-read --quiet + +if [ "$ss" -ne 0 ]; then + exit 1 +fi diff --git a/.github/workflow_scripts/build_mxnet.sh b/.github/workflow_scripts/build_mxnet.sh new file mode 100644 index 0000000000..2f5c3f47cb --- /dev/null +++ b/.github/workflow_scripts/build_mxnet.sh @@ -0,0 +1,38 @@ +#!/bin/bash + +set -ex + +# Used to capture status exit of build eval command +ss=0 + +REPO_NAME="$1" # Eg. 'd2l-en' +TARGET_BRANCH="$2" # Eg. 'master' ; if PR raised to master + +pip3 install . +mkdir _build + +# Move sanity check outside +d2lbook build outputcheck tabcheck + +# Move aws copy commands for cache restore outside +echo "Retrieving mxnet build cache" +aws s3 sync s3://preview.d2l.ai/ci_cache/"$REPO_NAME"-"$TARGET_BRANCH"/_build/eval_mxnet/ _build/eval_mxnet/ --delete --quiet --exclude 'data/*' + +# MXNet training for the following notebooks is slow in the container; +# Setting NTHREADS=4 below seems to fix the issue: +# 1. chapter_multilayer-perceptrons/dropout.md +# 2. chapter_multilayer-perceptrons/mlp-implementation.md +# 3. chapter_linear-classification/softmax-regression-concise.md +# 4. chapter_linear-classification/softmax-regression-scratch.md +export MXNET_CPU_WORKER_NTHREADS=4 +# Continue the script even if some notebooks in build fail to +# make sure that cache is copied to s3 for the successful notebooks +d2lbook build eval --tab mxnet || ((ss=1)) + +# Move aws copy commands for cache store outside +echo "Upload mxnet build cache to s3" +aws s3 sync _build s3://preview.d2l.ai/ci_cache/"$REPO_NAME"-"$TARGET_BRANCH"/_build --acl public-read --quiet + +if [ "$ss" -ne 0 ]; then + exit 1 +fi diff --git a/.github/workflow_scripts/build_pytorch.sh b/.github/workflow_scripts/build_pytorch.sh new file mode 100644 index 0000000000..398231e87a --- /dev/null +++ b/.github/workflow_scripts/build_pytorch.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +set -ex + +# Used to capture status exit of build eval command +ss=0 + +REPO_NAME="$1" # Eg. 'd2l-en' +TARGET_BRANCH="$2" # Eg. 'master' ; if PR raised to master + +pip3 install . +mkdir _build + +# Move sanity check outside +d2lbook build outputcheck tabcheck + +# Move aws copy commands for cache restore outside +echo "Retrieving pytorch build cache" +aws s3 sync s3://preview.d2l.ai/ci_cache/"$REPO_NAME"-"$TARGET_BRANCH"/_build/eval _build/eval --delete --quiet --exclude 'data/*' +echo "Retrieving pytorch slides cache" +aws s3 sync s3://preview.d2l.ai/ci_cache/"$REPO_NAME"-"$TARGET_BRANCH"/_build/slides _build/slides --delete --quiet --exclude 'data/*' + +# Continue the script even if some notebooks in build fail to +# make sure that cache is copied to s3 for the successful notebooks +d2lbook build eval || ((ss=1)) +d2lbook build slides --tab pytorch + +# Move aws copy commands for cache store outside +echo "Upload pytorch build cache to s3" +aws s3 sync _build s3://preview.d2l.ai/ci_cache/"$REPO_NAME"-"$TARGET_BRANCH"/_build --acl public-read --quiet + +# Exit with a non-zero status if evaluation failed +if [ "$ss" -ne 0 ]; then + exit 1 +fi diff --git a/.github/workflow_scripts/build_tf.sh b/.github/workflow_scripts/build_tf.sh new file mode 100644 index 0000000000..aadc8c1e87 --- /dev/null +++ b/.github/workflow_scripts/build_tf.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +set -ex + +# Used to capture status exit of build eval command +ss=0 + +REPO_NAME="$1" # Eg. 'd2l-en' +TARGET_BRANCH="$2" # Eg. 'master' ; if PR raised to master + +pip3 install . +mkdir _build + +# Move sanity check outside +d2lbook build outputcheck tabcheck + +# Move aws copy commands for cache restore outside +echo "Retrieving tensorflow build cache" +aws s3 sync s3://preview.d2l.ai/ci_cache/"$REPO_NAME"-"$TARGET_BRANCH"/_build/eval_tensorflow/ _build/eval_tensorflow/ --delete --quiet --exclude 'data/*' + +export TF_CPP_MIN_LOG_LEVEL=3 +export TF_FORCE_GPU_ALLOW_GROWTH=true +# Continue the script even if some notebooks in build fail to +# make sure that cache is copied to s3 for the successful notebooks +d2lbook build eval --tab tensorflow || ((ss=1)) + +# Move aws copy commands for cache store outside +echo "Upload tensorflow build cache to s3" +aws s3 sync _build s3://preview.d2l.ai/ci_cache/"$REPO_NAME"-"$TARGET_BRANCH"/_build --acl public-read --quiet + +if [ "$ss" -ne 0 ]; then + exit 1 +fi diff --git a/.github/workflow_scripts/setup_git.sh b/.github/workflow_scripts/setup_git.sh new file mode 100644 index 0000000000..ba80719b07 --- /dev/null +++ b/.github/workflow_scripts/setup_git.sh @@ -0,0 +1,18 @@ +function setup_git { + # Turn off logging + set +x + mkdir -p $HOME/.ssh + echo "yes" | ssh-keyscan -t rsa github.com >> ~/.ssh/known_hosts + + # Retrieve the SSH key securely from AWS Secrets Manager + GIT_SSH_KEY=$(aws secretsmanager get-secret-value --secret-id d2l_bot_github --query SecretString --output text --region us-west-2) + + # Write the SSH key to a file + echo "$GIT_SSH_KEY" > $HOME/.ssh/id_rsa + chmod 600 $HOME/.ssh/id_rsa + + git config --global user.name "d2l-bot" + git config --global user.email "100248899+d2l-bot@users.noreply.github.com" + + echo "Successfully Configured Bot" +} diff --git a/.github/workflows/build-docker.yml b/.github/workflows/build-docker.yml new file mode 100644 index 0000000000..9f5b8dc476 --- /dev/null +++ b/.github/workflows/build-docker.yml @@ -0,0 +1,87 @@ +name: Build Docker Image + +on: + workflow_dispatch: + inputs: + image_torch: + type: boolean + description: Build PyTorch Image + image_tf: + type: boolean + description: Build TensorFlow Image + image_jax: + type: boolean + description: Build JAX Image + image_mxnet: + type: boolean + description: Build MXNet Image + image_builder: + type: boolean + description: Build D2L Builder Image + +permissions: + id-token: write + contents: read + +jobs: + build_docker_image: + name: Build D2L Docker Images + runs-on: ubuntu-latest + defaults: + run: + working-directory: ./ci/docker + steps: + - uses: actions/checkout@v2 + - name: Configure AWS Credentials + uses: aws-actions/configure-aws-credentials@v2 + with: + role-to-assume: arn:aws:iam::650140442593:role/D2L_CI_Docker + role-duration-seconds: 3600 + aws-region: us-west-2 + + - name: Build D2L PyTorch Image + if: github.event.inputs.image_torch == 'true' + run: | + chmod +x ./login_ecr.sh; ./login_ecr.sh + docker build -f Dockerfile.d2l-torch -t d2l-containers:d2l-torch-latest . + docker tag d2l-containers:d2l-torch-latest 650140442593.dkr.ecr.us-west-2.amazonaws.com/d2l-containers:d2l-torch-latest + docker push 650140442593.dkr.ecr.us-west-2.amazonaws.com/d2l-containers:d2l-torch-latest + # Clean up to reclaim space + echo "y" | docker system prune -a + + - name: Build D2L TensorFlow Image + if: github.event.inputs.image_tf == 'true' + run: | + chmod +x ./login_ecr.sh; ./login_ecr.sh + docker build -f Dockerfile.d2l-tf -t d2l-containers:d2l-tensorflow-latest . + docker tag d2l-containers:d2l-tensorflow-latest 650140442593.dkr.ecr.us-west-2.amazonaws.com/d2l-containers:d2l-tensorflow-latest + docker push 650140442593.dkr.ecr.us-west-2.amazonaws.com/d2l-containers:d2l-tensorflow-latest + # Clean up to reclaim space + echo "y" | docker system prune -a + + - name: Build D2L JAX Image + if: github.event.inputs.image_jax == 'true' + run: | + chmod +x ./login_ecr.sh; ./login_ecr.sh + echo ${{ secrets.NVCR_JAX_DOCKER_PASSWORD }} | docker login -u "\$oauthtoken" --password-stdin nvcr.io + docker build -f Dockerfile.d2l-jax -t d2l-containers:d2l-jax-latest . + docker tag d2l-containers:d2l-jax-latest 650140442593.dkr.ecr.us-west-2.amazonaws.com/d2l-containers:d2l-jax-latest + docker push 650140442593.dkr.ecr.us-west-2.amazonaws.com/d2l-containers:d2l-jax-latest + # Clean up to reclaim space + echo "y" | docker system prune -a + + - name: Build D2L MXNet Image + if: github.event.inputs.image_mxnet == 'true' + run: | + chmod +x ./login_ecr.sh; ./login_ecr.sh + docker build -f Dockerfile.d2l-mxnet -t d2l-containers:d2l-mxnet-latest . + docker tag d2l-containers:d2l-mxnet-latest 650140442593.dkr.ecr.us-west-2.amazonaws.com/d2l-containers:d2l-mxnet-latest + docker push 650140442593.dkr.ecr.us-west-2.amazonaws.com/d2l-containers:d2l-mxnet-latest + + - name: Build D2L CPU Builder Image + if: github.event.inputs.image_builder == 'true' + run: | + chmod +x ./login_ecr.sh; ./login_ecr.sh + docker build -f Dockerfile.d2l-builder -t d2l-containers:d2l-builder-latest . + docker tag d2l-containers:d2l-builder-latest 650140442593.dkr.ecr.us-west-2.amazonaws.com/d2l-containers:d2l-builder-latest + docker push 650140442593.dkr.ecr.us-west-2.amazonaws.com/d2l-containers:d2l-builder-latest diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000000..1ac5c9b7ed --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,115 @@ +name: Continuous Integration + +on: + # Triggers the workflow on push or pull request events but only for the "main" branch + push: + branches: + - main + pull_request_target: + branches: + - main + +permissions: + id-token: write + pull-requests: write + +defaults: + run: + shell: bash + +jobs: + build_torch: + name: Build PyTorch + if: "!contains(github.event.head_commit.message, '[skip torch]') && !contains(github.event.head_commit.message, '[skip frameworks]')" + # needs: [sanity_check] + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v2 + - name: Setup Env Vars + uses: ./.github/actions/setup_env_vars + - name: Evaluate PyTorch on AWS Batch + uses: ./.github/actions/submit-job + with: + job-type: ci-gpu-torch + job-name: D2L-Build-PyTorch + command: chmod +x ./.github/workflow_scripts/build_pytorch.sh && ./.github/workflow_scripts/build_pytorch.sh '${{ env.REPO_NAME }}' '${{ env.TARGET_BRANCH }}' + + build_tf: + name: Build Tensorflow + if: "!contains(github.event.head_commit.message, '[skip tf]') && !contains(github.event.head_commit.message, '[skip frameworks]')" + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v2 + - name: Setup Env Vars + uses: ./.github/actions/setup_env_vars + - name: Evaluate Tensorflow on AWS Batch + uses: ./.github/actions/submit-job + with: + job-type: ci-gpu-tf + job-name: D2L-Build-Tensorflow + command: chmod +x ./.github/workflow_scripts/build_tf.sh && ./.github/workflow_scripts/build_tf.sh '${{ env.REPO_NAME }}' '${{ env.TARGET_BRANCH }}' + + build_jax: + name: Build JAX + if: "!contains(github.event.head_commit.message, '[skip jax]') && !contains(github.event.head_commit.message, '[skip frameworks]')" + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v2 + - name: Setup Env Vars + uses: ./.github/actions/setup_env_vars + - name: Evaluate JAX on AWS Batch + uses: ./.github/actions/submit-job + with: + job-type: ci-gpu-jax + job-name: D2L-Build-JAX + command: chmod +x ./.github/workflow_scripts/build_jax.sh && ./.github/workflow_scripts/build_jax.sh '${{ env.REPO_NAME }}' '${{ env.TARGET_BRANCH }}' + + build_mxnet: + name: Build MXNet + if: "!contains(github.event.head_commit.message, '[skip mxnet]') && !contains(github.event.head_commit.message, '[skip frameworks]')" + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v2 + - name: Setup Env Vars + uses: ./.github/actions/setup_env_vars + - name: Evaluate MXNet on AWS Batch + uses: ./.github/actions/submit-job + with: + job-type: ci-gpu-mxnet + job-name: D2L-Build-MXNet + command: chmod +x ./.github/workflow_scripts/build_mxnet.sh && ./.github/workflow_scripts/build_mxnet.sh '${{ env.REPO_NAME }}' '${{ env.TARGET_BRANCH }}' + + build_and_deploy: + name: Build Website/PDF & Publish Preview/Release + needs: [build_torch, build_tf, build_jax, build_mxnet] + if: | + always() && + !contains(github.event.head_commit.message, '[skip builder]') && + (needs.build_torch.result == 'success' || needs.build_torch.result == 'skipped') && + (needs.build_tf.result == 'success' || needs.build_tf.result == 'skipped') && + (needs.build_jax.result == 'success' || needs.build_jax.result == 'skipped') && + (needs.build_mxnet.result == 'success' || needs.build_mxnet.result == 'skipped') + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v2 + - name: Setup Env Vars + uses: ./.github/actions/setup_env_vars + - name: Build Website & PDFs on AWS Batch + uses: ./.github/actions/submit-job + with: + job-type: ci-cpu + job-name: D2L-Builder + command: chmod +x ./.github/workflow_scripts/build_and_deploy.sh ./.github/workflow_scripts/build_html.sh && ./.github/workflow_scripts/build_and_deploy.sh '${{ env.REPO_NAME }}' '${{ env.TARGET_BRANCH }}' '${{ env.JOB_NAME }}' '${{ env.LANG }}' + - name: Comment on PR + if: ${{ github.event_name == 'pull_request_target' }} + uses: peter-evans/create-or-update-comment@v3.0.0 + with: + issue-number: ${{ github.event.number }} + body: | + Job PR-${{ github.event.number }}-${{ env.SHORT_SHA }} is done. + Check the results at http://preview.d2l.ai/${{ env.JOB_NAME }} diff --git a/Jenkinsfile b/Jenkinsfile deleted file mode 100644 index 6a9713187e..0000000000 --- a/Jenkinsfile +++ /dev/null @@ -1,102 +0,0 @@ -stage("Build and Publish") { - // such as d2l-en and d2l-zh - def REPO_NAME = env.JOB_NAME.split('/')[0] - // such as en and zh - def LANG = REPO_NAME.split('-')[1] - // The current branch or the branch this PR will merge into - def TARGET_BRANCH = env.CHANGE_TARGET ? env.CHANGE_TARGET : env.BRANCH_NAME - // such as d2l-en-master - def TASK = REPO_NAME + '-' + TARGET_BRANCH - node('d2l-worker') { - ws("workspace/${TASK}") { - checkout scm - // conda environment - def ENV_NAME = "${TASK}-${EXECUTOR_NUMBER}"; - - sh label: "Build Environment", script: """set -ex - conda env update -n ${ENV_NAME} -f static/build.yml - conda activate ${ENV_NAME} - pip install git+https://github.com/d2l-ai/d2l-book - pip list - nvidia-smi - """ - - sh label: "Sanity Check", script: """set -ex - conda activate ${ENV_NAME} - d2lbook build outputcheck tabcheck - """ - - sh label: "Execute Notebooks", script: """set -ex - conda activate ${ENV_NAME} - ./static/cache.sh restore _build/eval/data - d2lbook build eval - d2lbook build slides --tab pytorch - ./static/cache.sh store _build/eval/data - """ - - sh label: "Execute Notebooks [Jax]", script: """set -ex - conda activate ${ENV_NAME} - ./static/cache.sh restore _build/eval_jax/data - export XLA_PYTHON_CLIENT_MEM_FRACTION=.70 - export TF_CPP_MIN_LOG_LEVEL=3 - export TF_FORCE_GPU_ALLOW_GROWTH=true - d2lbook build eval --tab jax - ./static/cache.sh store _build/eval_jax/data - """ - - sh label: "Execute Notebooks [TensorFlow]", script: """set -ex - conda activate ${ENV_NAME} - ./static/cache.sh restore _build/eval_tensorflow/data - export TF_CPP_MIN_LOG_LEVEL=3 - export TF_FORCE_GPU_ALLOW_GROWTH=true - d2lbook build eval --tab tensorflow - ./static/cache.sh store _build/eval_tensorflow/data - """ - - sh label: "Execute Notebooks [MXNet]", script: """set -ex - conda activate ${ENV_NAME} - ./static/cache.sh restore _build/eval_mxnet/data - d2lbook build eval --tab mxnet - ./static/cache.sh store _build/eval_mxnet/data - """ - - sh label:"Build HTML", script:"""set -ex - conda activate ${ENV_NAME} - ./static/build_html.sh ${env.BRANCH_NAME} ${JOB_NAME} - """ - - sh label:"Build PDF", script:"""set -ex - conda activate ${ENV_NAME} - d2lbook build pdf - """ - - sh label:"Build MXNet PDF", script:"""set -ex - conda activate ${ENV_NAME} - d2lbook build pdf --tab mxnet - """ - - if (env.BRANCH_NAME == 'release') { - sh label:"Release", script:"""set -ex - conda activate ${ENV_NAME} - d2lbook build pkg - d2lbook deploy html pdf pkg colab sagemaker slides --s3 s3://${LANG}.d2l.ai/ - """ - - // Publish d2l pypi package - // sh label:"Release d2l", script:"""set -ex - // conda activate ${ENV_NAME} - // pip install setuptools wheel twine - // python setup.py bdist_wheel - // """ - } else { - sh label:"Publish", script:"""set -ex - conda activate ${ENV_NAME} - d2lbook deploy html pdf --s3 s3://preview.d2l.ai/${JOB_NAME}/ - """ - if (env.BRANCH_NAME.startsWith("PR-")) { - pullRequest.comment("Job ${JOB_NAME}/${BUILD_NUMBER} is complete. \nCheck the results at http://preview.d2l.ai/${JOB_NAME}/") - } - } - } - } -} diff --git a/chapter_appendix-mathematics-for-deep-learning/eigendecomposition.md b/chapter_appendix-mathematics-for-deep-learning/eigendecomposition.md index caf8e5d148..56db84160a 100644 --- a/chapter_appendix-mathematics-for-deep-learning/eigendecomposition.md +++ b/chapter_appendix-mathematics-for-deep-learning/eigendecomposition.md @@ -105,8 +105,7 @@ from d2l import torch as d2l from IPython import display import torch -torch.eig(torch.tensor([[2, 1], [2, 3]], dtype=torch.float64), - eigenvectors=True) +torch.linalg.eig(torch.tensor([[2, 1], [2, 3]], dtype=torch.float64)) ``` ```{.python .input} @@ -295,7 +294,7 @@ A = torch.tensor([[1.0, 0.1, 0.1, 0.1], [0.1, 0.2, 5.0, 0.5], [0.1, 0.3, 0.5, 9.0]]) -v, _ = torch.eig(A) +v, _ = torch.linalg.eig(A) v ``` @@ -497,7 +496,7 @@ print(f'norms of eigenvalues: {norm_eigs}') ```{.python .input} #@tab pytorch # Compute the eigenvalues -eigs = torch.eig(A)[0][:,0].tolist() +eigs = torch.linalg.eig(A).eigenvalues.tolist() norm_eigs = [torch.abs(torch.tensor(x)) for x in eigs] norm_eigs.sort() print(f'norms of eigenvalues: {norm_eigs}') diff --git a/chapter_optimization/gd.md b/chapter_optimization/gd.md index e39ecc841e..3e75ba5224 100644 --- a/chapter_optimization/gd.md +++ b/chapter_optimization/gd.md @@ -162,7 +162,10 @@ def train_2d(trainer, steps=20, f_grad=None): #@save results.append((x1, x2)) print(f'epoch {i + 1}, x1: {float(x1):f}, x2: {float(x2):f}') return results +``` +```{.python .input} +#@tab mxnet, tensorflow def show_trace_2d(f, results): #@save """Show the trace of 2D variables during optimization.""" d2l.set_figsize() @@ -174,6 +177,19 @@ def show_trace_2d(f, results): #@save d2l.plt.ylabel('x2') ``` +```{.python .input} +#@tab pytorch +def show_trace_2d(f, results): #@save + """Show the trace of 2D variables during optimization.""" + d2l.set_figsize() + d2l.plt.plot(*zip(*results), '-o', color='#ff7f0e') + x1, x2 = d2l.meshgrid(d2l.arange(-5.5, 1.0, 0.1), + d2l.arange(-3.0, 1.0, 0.1), indexing='ij') + d2l.plt.contour(x1, x2, f(x1, x2), colors='#1f77b4') + d2l.plt.xlabel('x1') + d2l.plt.ylabel('x2') +``` + Next, we observe the trajectory of the optimization variable $\mathbf{x}$ for learning rate $\eta = 0.1$. We can see that after 20 steps the value of $\mathbf{x}$ approaches its minimum at $[0, 0]$. Progress is fairly well-behaved albeit rather slow. ```{.python .input} diff --git a/chapter_preliminaries/pandas.md b/chapter_preliminaries/pandas.md index 5cec10c683..c24713c330 100644 --- a/chapter_preliminaries/pandas.md +++ b/chapter_preliminaries/pandas.md @@ -123,7 +123,7 @@ we can load them into a tensor**] (recall :numref:`sec_ndarray`). %%tab mxnet from mxnet import np -X, y = np.array(inputs.values), np.array(targets.values) +X, y = np.array(inputs.to_numpy(dtype=float)), np.array(targets.to_numpy(dtype=float)) X, y ``` @@ -131,7 +131,8 @@ X, y %%tab pytorch import torch -X, y = torch.tensor(inputs.values), torch.tensor(targets.values) +X = torch.tensor(inputs.to_numpy(dtype=float)) +y = torch.tensor(targets.to_numpy(dtype=float)) X, y ``` @@ -139,7 +140,8 @@ X, y %%tab tensorflow import tensorflow as tf -X, y = tf.constant(inputs.values), tf.constant(targets.values) +X = tf.constant(inputs.to_numpy(dtype=float)) +y = tf.constant(targets.to_numpy(dtype=float)) X, y ``` @@ -147,7 +149,8 @@ X, y %%tab jax from jax import numpy as jnp -X, y = jnp.array(inputs.values), jnp.array(targets.values) +X = jnp.array(inputs.to_numpy(dtype=float)) +y = jnp.array(targets.to_numpy(dtype=float)) X, y ``` diff --git a/chapter_recommender-systems/movielens.md b/chapter_recommender-systems/movielens.md index c76e94ee3d..95d3e0fa8f 100644 --- a/chapter_recommender-systems/movielens.md +++ b/chapter_recommender-systems/movielens.md @@ -31,8 +31,8 @@ d2l.DATA_HUB['ml-100k'] = ( def read_data_ml100k(): data_dir = d2l.download_extract('ml-100k') names = ['user_id', 'item_id', 'rating', 'timestamp'] - data = pd.read_csv(os.path.join(data_dir, 'u.data'), '\t', names=names, - engine='python') + data = pd.read_csv(os.path.join(data_dir, 'u.data'), sep='\t', + names=names, engine='python') num_users = data.user_id.unique().shape[0] num_items = data.item_id.unique().shape[0] return data, num_users, num_items diff --git a/ci/docker/Dockerfile.d2l-builder b/ci/docker/Dockerfile.d2l-builder new file mode 100644 index 0000000000..9446a8ccb8 --- /dev/null +++ b/ci/docker/Dockerfile.d2l-builder @@ -0,0 +1,39 @@ +FROM ubuntu:latest + +RUN adduser --disabled-password --disabled-login ci +WORKDIR /home/ci + +# Add d2l_job script +ADD d2l_job.sh . +RUN chmod +x d2l_job.sh; chown ci d2l_job.sh + +# Copy git timesync for caching +ADD git-timesync /home/ci/.local/bin/ +RUN chmod +x /home/ci/.local/bin/git-timesync + +RUN apt-get update && apt-get -y install build-essential git wget + +# Install pdf dependencies +RUN DEBIAN_FRONTEND=noninteractive apt-get install -y texlive-full +RUN apt-get install -y librsvg2-bin xindy pandoc + +# Install fonts +RUN wget https://raw.githubusercontent.com/d2l-ai/utils/master/install_fonts.sh +# Remove "sudo " from the fonts script +RUN sed -i 's/sudo //g' install_fonts.sh +RUN chmod +x install_fonts.sh; ./install_fonts.sh + +RUN apt-get install -y python3 python3-pip python-is-python3 + +# Allow permissions for pip installations and git-timesync +RUN chown -R ci:ci /home/ci/.local + +USER ci + +ENV PATH="/home/ci/.local/bin:$PATH" + +# Install d2lbook using pip +# Install all libraries (cpu) to make sure API reference works for PDF builds +RUN pip3 install git+https://github.com/d2l-ai/d2l-book torch tensorflow jax mxnet + +CMD ["/bin/bash"] diff --git a/ci/docker/Dockerfile.d2l-jax b/ci/docker/Dockerfile.d2l-jax new file mode 100644 index 0000000000..294c6f3080 --- /dev/null +++ b/ci/docker/Dockerfile.d2l-jax @@ -0,0 +1,25 @@ +# Use JAX & Flax (March 2023) +FROM nvcr.io/nvdlfwea/jax/jax:23.03-py3 + +RUN adduser --disabled-password --disabled-login ci +WORKDIR /home/ci + +# Copy d2l_job script +ADD d2l_job.sh . +RUN chmod +x d2l_job.sh; chown ci d2l_job.sh + +# Copy git timesync for caching +ADD git-timesync /home/ci/.local/bin/ +RUN chmod +x /home/ci/.local/bin/git-timesync + +# Allow permissions for pip installations and git-timesync +RUN chown -R ci:ci /home/ci/.local + +USER ci + +ENV PATH="/home/ci/.local/bin:$PATH" + +# Install d2lbook using pip + dependencies for jax d2l +RUN pip3 install git+https://github.com/d2l-ai/d2l-book tensorflow-datasets==4.8.3 + +CMD ["/bin/bash"] diff --git a/ci/docker/Dockerfile.d2l-mxnet b/ci/docker/Dockerfile.d2l-mxnet new file mode 100644 index 0000000000..38b4d88630 --- /dev/null +++ b/ci/docker/Dockerfile.d2l-mxnet @@ -0,0 +1,28 @@ +# Use MXNet 1.9.1 (March 2023) +FROM nvcr.io/nvidia/mxnet:23.03-py3 + +RUN adduser --disabled-password --disabled-login ci +WORKDIR /home/ci + +# Copy d2l_job script +ADD d2l_job.sh . +RUN chmod +x d2l_job.sh; chown ci d2l_job.sh + +# Copy git timesync for caching +ADD git-timesync /home/ci/.local/bin/ +RUN chmod +x /home/ci/.local/bin/git-timesync + +# Allow permissions for pip installations and git-timesync +RUN chown -R ci:ci /home/ci/.local + +# Allow write permissions for downloading data to opt/mxnet/data +RUN chown -R ci:ci /opt + +USER ci + +ENV PATH="/home/ci/.local/bin:$PATH" + +# Install d2lbook using pip +RUN pip3 install git+https://github.com/d2l-ai/d2l-book + +CMD ["/bin/bash"] diff --git a/ci/docker/Dockerfile.d2l-tf b/ci/docker/Dockerfile.d2l-tf new file mode 100644 index 0000000000..761b73e810 --- /dev/null +++ b/ci/docker/Dockerfile.d2l-tf @@ -0,0 +1,25 @@ +# Use Tensorflow 2.11 (March 2023) +FROM nvcr.io/nvidia/tensorflow:23.03-tf2-py3 + +RUN adduser --disabled-password --disabled-login ci +WORKDIR /home/ci + +# Copy d2l_job script +ADD d2l_job.sh . +RUN chmod +x d2l_job.sh; chown ci d2l_job.sh + +# Copy git timesync for caching +ADD git-timesync /home/ci/.local/bin/ +RUN chmod +x /home/ci/.local/bin/git-timesync + +# Allow permissions for pip installations and git-timesync +RUN chown -R ci:ci /home/ci/.local + +USER ci + +ENV PATH="/home/ci/.local/bin:$PATH" + +# Install d2lbook using pip + dependencies for tensorflow d2l +RUN pip3 install git+https://github.com/d2l-ai/d2l-book tensorflow-probability==0.19.0 + +CMD ["/bin/bash"] diff --git a/ci/docker/Dockerfile.d2l-torch b/ci/docker/Dockerfile.d2l-torch new file mode 100644 index 0000000000..5631b258cf --- /dev/null +++ b/ci/docker/Dockerfile.d2l-torch @@ -0,0 +1,25 @@ +# Use PyTorch 2.0 (March 2023) +FROM nvcr.io/nvidia/pytorch:23.03-py3 + +RUN adduser --disabled-password --disabled-login ci +WORKDIR /home/ci + +# Copy d2l_job script +ADD d2l_job.sh . +RUN chmod +x d2l_job.sh; chown ci d2l_job.sh + +# Copy git timesync for caching +ADD git-timesync /home/ci/.local/bin/ +RUN chmod +x /home/ci/.local/bin/git-timesync + +# Allow permissions for pip installations and git-timesync +RUN chown -R ci:ci /home/ci/.local + +USER ci + +ENV PATH="/home/ci/.local/bin:$PATH" + +# Install d2lbook using pip + dependencies for torch d2l +RUN pip3 install git+https://github.com/d2l-ai/d2l-book gym==0.21.0 gpytorch scipy syne-tune[gpsearchers]==0.3.2 + +CMD ["/bin/bash"] diff --git a/ci/docker/d2l_job.sh b/ci/docker/d2l_job.sh new file mode 100644 index 0000000000..cfc42130a6 --- /dev/null +++ b/ci/docker/d2l_job.sh @@ -0,0 +1,68 @@ +#!/bin/bash + +date +echo "Args: $@" +env +echo "jobId: $AWS_BATCH_JOB_ID" +echo "jobQueue: $AWS_BATCH_JQ_NAME" +echo "computeEnvironment: $AWS_BATCH_CE_NAME" + +SOURCE_REF=$1 +WORK_DIR=$2 +COMMAND=$3 +############### NOT USED ATM ################## +SAVED_OUTPUT=$4 +SAVE_PATH=$5 +############################################### +REMOTE=$6 +SAFE_TO_USE_SCRIPT=$7 +ORIGINAL_REPO=${8:-'d2l-en'} +# TODO @anirudhdagar: hardcode ORIGINAL_ORG +# Avoid ability to change org by restricting +# job definition arguments defined in d2l-infra +# This is only changed for testing purposes +ORIGINAL_ORG=${9:-'d2l-ai'} + + +# Copy the workflow from master branch +git clone https://github.com/"$ORIGINAL_ORG"/"$ORIGINAL_REPO".git + +WORKFLOW_SCRIPTS="$ORIGINAL_REPO"/.github/workflow_scripts +if [ -d "$WORKFLOW_SCRIPTS" ]; then + cp -R "$ORIGINAL_REPO"/.github/workflow_scripts . +fi + +cd "$ORIGINAL_REPO" + +if [ ! -z $REMOTE ]; then + git remote set-url origin $REMOTE +fi + +git fetch origin $SOURCE_REF:working +git checkout working + +# Reset modification times for all notebooks using git-timesync +# We use this to make sure d2lbook build eval caching is valid +# even after cloning the repo for each run +# Modification times for original repo files are corrected and are now +# good for comparing with modification times of build files coming +# from the S3 bucket +git timesync *.md **/*.md + +# If not safe to use script, we overwrite with the script from master branch +TRUE=true +if [[ ${SAFE_TO_USE_SCRIPT,,} != ${TRUE,,} ]]; then + if [ -d ../workflow_scripts ]; then + rm -rf .github/workflow_scripts + mv ../workflow_scripts .github/ + else + echo Not safe to use user provided script, and could not find script from master branches + exit 1 + fi +fi + +cd $WORK_DIR +/bin/bash -o pipefail -c "eval $COMMAND" +COMMAND_EXIT_CODE=$? + +exit $COMMAND_EXIT_CODE diff --git a/ci/docker/git-timesync b/ci/docker/git-timesync new file mode 100644 index 0000000000..2e3a121fe4 --- /dev/null +++ b/ci/docker/git-timesync @@ -0,0 +1,288 @@ +#!/bin/sh + +# This script has been lifted from https://github.com/tst2005/git-timesync/ +# and all credits for this belong to @tst2005 + +# shellcheck disable=SC2004,SC3043,SC2155,SC2039 + +#### About shellcheck +# I disable: +# - SC2004 (style): $/${} is unnecessary on arithmetic variables. +# Because: I prefere `$( $x - $y ))` than `$(( x - y ))`. + +# I disable: +# - SC2039: In POSIX sh, 'local' is undefined. +# - SC3043: In POSIX sh, 'local' is undefined. +# Because: +# local is too usefull, all modern shell support it. +# If you really want to run git-timesync on a strict POSIX shell, +# then remove all local prefixes : 's/local //g'. +# +# I disable : +# - SC2155 (warning): Declare and assign separately to avoid masking return values. +# Because: +# It not relevant : the return code is not used. +# I prefer `local foo="$(bar)"` than `local foo;foo="$(bar)"`. +#### + +set -e + +#### +# Author: TsT worldmaster.fr +# +# Improvements: +# - dry-run ("-n" flag) +# - pass files to check as argument +# - do not time sync modified files +# - do not time sync untracked files +# - performance improvment +# - be able to apply timesync only on files present in the N last commits +#### + +#### +# The original version of this script can be found at: https://gist.github.com/jeffery/1115504 +# +# Helper script to update the Last modified timestamp of files in a Git SCM +# Projects working Copy +# +# When you clone a Git repository, it sets the timestamp of all the files to the +# time when you cloned the repository. +# +# This becomes a problem when you want the cloned repository, which is part of a +# Web application have a proper cacheing mechanism so that it can re-cache files +# (into a webtree) that have been modified since the last cache. +# +# @see http://stackoverflow.com/questions/1964470/whats-the-equivalent-of-use-commit-times-for-git +# +# Author: Jeffery Fernandez +#### + +showUsage() { + echo 'Usage: git-timesync [-n] [-q] [-v] [--] [...]' + echo 'Usage: git-timesync [-n] [-q] [-v] -' + echo + echo ' -h, --help '\ + 'Print this help message' + echo ' -n, --dry-run, --dryrun '\ + 'Perform a dry-run to see which files are OK'\ + 'and which ones need to be synchronized' + echo ' -q, --quiet '\ + 'Quiet mode: drop everything that is OK and show only the files'\ + 'which timestamp needs to be synchronized' + echo ' -v, --verbose '\ + 'Verbose mode: show info for each file (opposite of --quiet)' + echo ' -1 '\ + 'Apply timesync on files present in the last commit' + echo ' -23 '\ + 'Apply timesync on files present in the 23 last commits' + echo ' -N '\ + 'Apply timesync on files present in the N last commits'\ + '(with 1 <= N <= 9999)' +} + +# Get the last revision hash of a particular file in the git repository +getFileLastRevision() { + git rev-list HEAD -n 1 -- "$1" +} + +#getFileMTimeByRef() { +# git show --pretty=format:%at --abbrev-commit "$1" | head -n 1 +#} + +getFileMTimeByPath() { + # shellcheck disable=SC2155 + git rev-list --pretty=format:'date %at' --date-order -n 1 HEAD -- "$1" | + ( + local IFS=" "; + # shellcheck disable=SC2034 + while read -r key value _misc; do + [ "$key" != "date" ] || echo "$value"; + done + ) +} + +# Extract the actual last modified timestamp of the file and Update the timestamp +updateFileTimeStamp() { + # shellcheck disable=SC2155 + + # if target does not exists and it's is not a [dead]link, raise an error + if [ ! -e "$1" ] && [ ! -h "$1" ]; then + if [ -n "$(git ls-files -t -d -- "$1")" ]; then + if $verbose; then echo "? $1 (deleted)"; fi + return + fi + echo >&2 "ERROR: Unknown bug ?! No such target $1" + return 1 + fi + + local tracked="$(git ls-files -t -c -- "$1")" + if [ -z "$tracked" ]; then + if $verbose; then echo "? $1"; fi + return + fi + + # Extract the last modified timestamp + # Get the File last modified time + local FILE_MODIFIED_TIME="$(getFileMTimeByPath "$1")" + if [ -z "$FILE_MODIFIED_TIME" ]; then + echo "?! $1 (not found in git)" + return + fi + + # Check if the file is modified + local uncommited="$(git ls-files -t -dm -- "$1")" + + # for displaying the date in readable format + #local FORMATTED_TIMESTAMP="$(date --date="${FILE_MODIFIED_TIME}" +'%d-%m-%Y %H:%M:%S %z')" + #local FORMATTED_TIMESTAMP="@${FILE_MODIFIED_TIME}" + + # Modify the last modified timestamp + #echo "[$(date -d "$FORMATTED_TIMESTAMP")]: $1" + #echo "$FILE_MODIFIED_TIME $1" + local current_mtime="$(getmtime "$1")" + if $debug; then + echo >&2 "DEBUG: $1 (git_time=$FILE_MODIFIED_TIME current_time=$current_mtime delta=$(( ${current_mtime:-0} - ${FILE_MODIFIED_TIME:-0} )))" + fi + if [ "$current_mtime" = "$FILE_MODIFIED_TIME" ]; then + if ${verbose:-true}; then echo "ok $1"; fi + return + fi + if [ -n "$uncommited" ]; then + echo "C $1 (modified, not commited, $(( $current_mtime - $FILE_MODIFIED_TIME ))s recent)" + return + fi + if ${dryrun:-true}; then + echo "!! $1 (desync: $(( $current_mtime - $FILE_MODIFIED_TIME ))s, no change)" + return + fi + echo "!! $1 (desync: $(( $current_mtime - $FILE_MODIFIED_TIME ))s, syncing...)" + #[ -h "$1" ] && touch -c -h -d "$FORMATTED_TIMESTAMP" -- "$1" || \ + #touch -c -d "$FORMATTED_TIMESTAMP" -- "$1" + unixtime_touch -c -h -- "$1" +} + + + +# Make sure we are not running this on a bare Repository +is_not_base_repo() { + case "$(git config core.bare)" in + false) ;; + true) + echo "$(pwd): Cannot run this script on a bare Repository" + return 1 + ;; + *) echo "$(pwd): Error appended during core.bare detection. Are you really inside a repository ?" + return 1 + esac + return 0 +} + +updateFileTimeStampInCwd() { + is_not_base_repo || return + + git ls-files -z \ + | tr '\0' '\n' \ + | ( + while read -r file; do + if [ -z "$(git ls-files -t -d -- "$file")" ]; then + updateFileTimeStamp "${file}" + fi + done + ) +} + +timesyncThisFile() { + if [ -d "$1" ] && [ ! -h "$1" ]; then # is a real directory (not a symlink to a directory) + echo "now inside $1" + # shellcheck disable=SC2015 + ( cd -- "$1" && updateFileTimeStampInCwd || true; ) + else + if $need_check_bare; then + is_not_base_repo || return 1 + need_check_bare=false + fi + updateFileTimeStamp "$1" + fi +} + +# ... for Linux ... and MINGW64 (used by Windows GIT Bash) +linux_unixtime_touch() { + # shellcheck disable=SC2155 + local FORMATTED_TIMESTAMP="@${FILE_MODIFIED_TIME}" + touch -d "$FORMATTED_TIMESTAMP" "$@" +} +linux_getmtime() { + stat -c %Y -- "$1" +} + +# ... for FreeBSD and Mac OS X +bsd_unixtime_touch() { + # shellcheck disable=SC2155 + local FORMATTED_TIMESTAMP="$(date -j -r "${FILE_MODIFIED_TIME}" +'%Y%m%d%H%M.%S')" + touch -t "$FORMATTED_TIMESTAMP" "$@" +} +bsd_getmtime() { + stat -f %m -- "$1" +} + +################################################################################ +############################## MAIN SCRIPT LOGIC ############################### +################################################################################ + +dryrun=false +verbose=true +debug=false +fromrecent='' +while [ $# -gt 0 ]; do + case "$1" in + --) shift; break ;; + -h|--help) showUsage; exit 0;; + -n|--dryrun|--dry-run) dryrun=true ;; + -v) verbose=true ;; + -q) verbose=false ;; + -[1-9]|-[1-9][0-9]|-[1-9][0-9][0-9]|-[1-9][0-9][0-9][0-9]) fromrecent="$1" ;; + --debug) debug=true ;; + -*) echo >&2 "$0: invalid option $1"; exit 1;; + *) break + esac + shift +done + +# Obtain the Operating System +case "${GIT_TIMESYNC_FORCE_UNAME:-$(uname)}" in + ('Linux'|'MINGW64'*) + unixtime_touch() { linux_unixtime_touch "$@"; } + getmtime() { linux_getmtime "$@"; } + ;; + ('Darwin'|'FreeBSD') + unixtime_touch() { bsd_unixtime_touch "$@"; } + getmtime() { bsd_getmtime "$@"; } + ;; + (*) + echo >&2 "Unknown Operating System to perform timestamp update" + exit 1 + ;; +esac + +if [ $# -eq 0 ] && [ -z "$fromrecent" ]; then + # Loop through and fix timestamps on all files in our checked-out repository + updateFileTimeStampInCwd +else + need_check_bare=true + + # Loop through and fix timestamps on all specified files + if [ -n "$fromrecent" ]; then + git log --format='format:' --name-only "$fromrecent" | + sort -u | + while read -r file; do + [ -n "$file" ] || continue + [ -e "$file" ] || continue + timesyncThisFile "$file" + done + else + for file in "$@"; do + timesyncThisFile "$file" + done + fi +fi + diff --git a/ci/docker/login_ecr.sh b/ci/docker/login_ecr.sh new file mode 100644 index 0000000000..f75a13c3f5 --- /dev/null +++ b/ci/docker/login_ecr.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +aws ecr get-login-password --region us-west-2 | docker login --username AWS --password-stdin 650140442593.dkr.ecr.us-west-2.amazonaws.com diff --git a/ci/submit-job.py b/ci/submit-job.py new file mode 100644 index 0000000000..5f32d31f62 --- /dev/null +++ b/ci/submit-job.py @@ -0,0 +1,195 @@ +import argparse +import random +import re +import sys +import time +from datetime import datetime + +import boto3 +from botocore.compat import total_seconds +from botocore.config import Config + + +job_type_info = { + 'ci-cpu': { + 'job_definition': 'd2l-ci-cpu-builder:2', + 'job_queue': 'D2L-CI-CPU' + }, + 'ci-cpu-push': { + 'job_definition': 'd2l-ci-cpu-builder-push:7', + 'job_queue': 'D2L-CI-CPU' + }, + 'ci-cpu-release': { + 'job_definition': 'd2l-ci-cpu-builder-release:1', + 'job_queue': 'D2L-CI-CPU' + }, + 'ci-gpu-torch': { + 'job_definition': 'd2l-ci-gpu-torch:2', + 'job_queue': 'D2L-CI-GPU' + }, + 'ci-gpu-tf': { + 'job_definition': 'd2l-ci-gpu-tf:2', + 'job_queue': 'D2L-CI-GPU' + }, + 'ci-gpu-jax': { + 'job_definition': 'd2l-ci-gpu-jax:2', + 'job_queue': 'D2L-CI-GPU' + }, + 'ci-gpu-mxnet': { + 'job_definition': 'd2l-ci-gpu-mxnet:2', + 'job_queue': 'D2L-CI-GPU' + } +} + +# Create push job types for GPUs with same definitions +for job_type in list(job_type_info.keys()): + if job_type.startswith('ci-gpu'): + job_type_info[job_type+'-push'] = job_type_info[job_type] + job_type_info[job_type+'-release'] = job_type_info[job_type] + +parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) + +parser.add_argument('--profile', help='profile name of aws account.', type=str, + default=None) +parser.add_argument('--region', help='Default region when creating new connections', type=str, + default='us-west-2') +parser.add_argument('--name', help='name of the job', type=str, default='d2l-ci') +parser.add_argument('--job-type', help='type of job to submit.', type=str, + choices=job_type_info.keys(), default='ci-cpu') +parser.add_argument('--source-ref', + help='ref in d2l-en main github. e.g. master, refs/pull/500/head', + type=str, default='master') +parser.add_argument('--work-dir', + help='working directory inside the repo. e.g. scripts/preprocess', + type=str, default='.') +parser.add_argument('--saved-output', + help='output to be saved, relative to working directory. ' + 'it can be either a single file or a directory', + type=str, default='None') +parser.add_argument('--save-path', + help='s3 path where files are saved.', + type=str, default='batch/temp/{}'.format(datetime.now().isoformat())) +parser.add_argument('--command', help='command to run', type=str, + default='git rev-parse HEAD | tee stdout.log') +parser.add_argument('--remote', + help='git repo address. https://github.com/d2l-ai/d2l-en', + type=str, default="https://github.com/d2l-ai/d2l-en") +parser.add_argument('--safe-to-use-script', + help='whether the script changes from the actor is safe. We assume it is safe if the actor has write permission to our repo', + action='store_true') +parser.add_argument('--original-repo', help='name of the repo', type=str, default='d2l-en') +parser.add_argument('--wait', help='block wait until the job completes. ' + 'Non-zero exit code if job fails.', action='store_true') +parser.add_argument('--timeout', help='job timeout in seconds', default=7200, type=int) + + +args = parser.parse_args() + +session = boto3.Session(profile_name=args.profile, region_name=args.region) +config = Config( + retries = dict( + max_attempts = 20 + ) +) +batch, cloudwatch = [session.client(service_name=sn, config=config) for sn in ['batch', 'logs']] + + +def printLogs(logGroupName, logStreamName, startTime): + kwargs = {'logGroupName': logGroupName, + 'logStreamName': logStreamName, + 'startTime': startTime, + 'startFromHead': True} + + lastTimestamp = startTime - 1 + while True: + logEvents = cloudwatch.get_log_events(**kwargs) + + for event in logEvents['events']: + lastTimestamp = event['timestamp'] + timestamp = datetime.utcfromtimestamp(lastTimestamp / 1000.0).isoformat() + print('[{}] {}'.format((timestamp + '.000')[:23] + 'Z', event['message'])) + + nextToken = logEvents['nextForwardToken'] + if nextToken and kwargs.get('nextToken') != nextToken: + kwargs['nextToken'] = nextToken + else: + break + return lastTimestamp + + +def nowInMillis(): + endTime = int(total_seconds(datetime.utcnow() - datetime(1970, 1, 1))) * 1000 + return endTime + + +def main(): + spin = ['-', '/', '|', '\\', '-', '/', '|', '\\'] + logGroupName = '/aws/batch/job' + + jobName = re.sub('[^A-Za-z0-9_\-]', '', args.name)[:128] # Enforce AWS Batch jobName rules + jobType = args.job_type + jobQueue = job_type_info[jobType]['job_queue'] + jobDefinition = job_type_info[jobType]['job_definition'] + wait = args.wait + + safe_to_use_script = 'False' + if args.safe_to_use_script: + safe_to_use_script = 'True' + + parameters = { + 'SOURCE_REF': args.source_ref, + 'WORK_DIR': args.work_dir, + 'SAVED_OUTPUT': args.saved_output, + 'SAVE_PATH': args.save_path, + 'COMMAND': f"\"{args.command}\"", # wrap command with double quotation mark, so that batch can treat it as a single command + 'REMOTE': args.remote, + 'SAFE_TO_USE_SCRIPT': safe_to_use_script, + 'ORIGINAL_REPO': args.original_repo + } + kwargs = dict( + jobName=jobName, + jobQueue=jobQueue, + jobDefinition=jobDefinition, + parameters=parameters, + ) + if args.timeout is not None: + kwargs['timeout'] = {'attemptDurationSeconds': args.timeout} + submitJobResponse = batch.submit_job(**kwargs) + + jobId = submitJobResponse['jobId'] + print('Submitted job [{} - {}] to the job queue [{}]'.format(jobName, jobId, jobQueue)) + + spinner = 0 + running = False + status_set = set() + startTime = 0 + logStreamName = None + while wait: + time.sleep(random.randint(5, 10)) + describeJobsResponse = batch.describe_jobs(jobs=[jobId]) + status = describeJobsResponse['jobs'][0]['status'] + if status == 'SUCCEEDED' or status == 'FAILED': + if logStreamName: + startTime = printLogs(logGroupName, logStreamName, startTime) + 1 + print('=' * 80) + print('Job [{} - {}] {}'.format(jobName, jobId, status)) + sys.exit(status == 'FAILED') + + elif status == 'RUNNING': + logStreamName = describeJobsResponse['jobs'][0]['container']['logStreamName'] + if not running: + running = True + print('\rJob [{}, {}] is RUNNING.'.format(jobName, jobId)) + if logStreamName: + print('Output [{}]:\n {}'.format(logStreamName, '=' * 80)) + if logStreamName: + startTime = printLogs(logGroupName, logStreamName, startTime) + 1 + elif status not in status_set: + status_set.add(status) + print('\rJob [%s - %s] is %-9s... %s' % (jobName, jobId, status, spin[spinner % len(spin)]),) + sys.stdout.flush() + spinner += 1 + + +if __name__ == '__main__': + main() diff --git a/d2l/mxnet.py b/d2l/mxnet.py index 85a811e950..9e3eb86751 100644 --- a/d2l/mxnet.py +++ b/d2l/mxnet.py @@ -2642,8 +2642,8 @@ def update_G(Z, net_D, net_G, loss, trainer_G): def read_data_ml100k(): data_dir = d2l.download_extract('ml-100k') names = ['user_id', 'item_id', 'rating', 'timestamp'] - data = pd.read_csv(os.path.join(data_dir, 'u.data'), '\t', names=names, - engine='python') + data = pd.read_csv(os.path.join(data_dir, 'u.data'), sep='\t', + names=names, engine='python') num_users = data.user_id.unique().shape[0] num_items = data.item_id.unique().shape[0] return data, num_users, num_items diff --git a/d2l/torch.py b/d2l/torch.py index d8804a7ea4..d26c41a323 100644 --- a/d2l/torch.py +++ b/d2l/torch.py @@ -1324,7 +1324,7 @@ def show_trace_2d(f, results): d2l.set_figsize() d2l.plt.plot(*zip(*results), '-o', color='#ff7f0e') x1, x2 = d2l.meshgrid(d2l.arange(-5.5, 1.0, 0.1), - d2l.arange(-3.0, 1.0, 0.1)) + d2l.arange(-3.0, 1.0, 0.1), indexing='ij') d2l.plt.contour(x1, x2, f(x1, x2), colors='#1f77b4') d2l.plt.xlabel('x1') d2l.plt.ylabel('x2') diff --git a/setup.py b/setup.py index 848a1a8eaa..3a4f5a77de 100644 --- a/setup.py +++ b/setup.py @@ -7,10 +7,7 @@ 'matplotlib', 'matplotlib-inline', 'requests', - 'pandas', - 'gym==0.21.0', - 'gpytorch', - 'scipy' + 'pandas' ] setup(