Docker improvements, remove CUDA default (#14)

* change default to no cuda * update Dockerfiles
BramVanroy · Mar 5, 2024 · 62197dd · 62197dd
1 parent 45e3fcf
commit 62197dd
Show file tree

Hide file tree

Showing 7 changed files with 201 additions and 127 deletions.
diff --git a/docker/cpu/Dockerfile b/docker/cpu/Dockerfile
@@ -0,0 +1,65 @@
+FROM python:3.11-slim-bookworm
+
+# Metadata as per https://github.com/opencontainers/image-spec/blob/master/annotations.md
+LABEL org.opencontainers.image.authors="Bram Vanroy"
+LABEL org.opencontainers.image.title="MAchine Translation Evaluation Online - Demo"
+
+# Avoid prompts from apt
+ENV DEBIAN_FRONTEND=noninteractive
+
+# Install dependencies in a single RUN command to reduce image layers
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
+    curl \
+    git \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+# Create a non-root user
+RUN useradd -m -u 1000 mateo_user
+USER mateo_user
+ENV HOME="/home/mateo_user"
+
+# Environment variables
+ENV PORT=7860 \
+    SERVER="localhost" \
+    BASE="" \
+    DEMO_MODE="" \
+    HF_HUB_ENABLE_HF_TRANSFER=1 \
+    PATH="${HOME}/.local/bin:${PATH}" \
+    USE_CUDA=false
+
+WORKDIR ${HOME}/mateo
+
+# Clone the repository
+RUN git clone https://github.com/BramVanroy/mateo-demo.git
+WORKDIR mateo-demo
+
+# Install Python dependencies with conditional torch installation
+RUN python -m pip install --no-cache-dir --upgrade pip wheel setuptools \
+    && python -m pip install --no-cache-dir torch==2.2.1+cpu -f https://download.pytorch.org/whl/torch \
+    && python -m pip install --no-cache-dir --upgrade .
+
+# Pre-download default models
+RUN huggingface-cli download bert-base-multilingual-cased model.safetensors tokenizer.json vocab.txt; \
+    huggingface-cli download facebook/nllb-200-distilled-600M pytorch_model.bin sentencepiece.bpe.model tokenizer.json; \
+    python -c "import comet; from comet import download_model; download_model('Unbabel/wmt22-comet-da')"; \
+    python -c "import evaluate; evaluate.load('bleurt', 'BLEURT-20')"
+
+# Expose the port the app runs on
+EXPOSE $PORT
+
+# Healthcheck to ensure the service is running
+HEALTHCHECK CMD curl --fail http://$SERVER:$PORT$BASE/_stcore/health || exit 1
+
+# Set the working directory to the Streamlit app
+WORKDIR src/mateo_st
+
+# Simplify the CMD script with conditional --use_cuda flag
+CMD streamlit run 01_🎈_MATEO.py \
+    --server.port $PORT \
+    --server.address $(if [ "$SERVER" = "localhost" ]; then echo "0.0.0.0"; else echo $SERVER; fi) \
+    $(if [ -n "$BASE" ]; then echo "--server.baseUrlPath $BASE"; fi) \
+    $(if [ "$DEMO_MODE" = "true" ]; then echo "--server.maxUploadSize 1"; fi) \
+    -- \
+    $(if [ "$DEMO_MODE" = "true" ]; then echo "--demo_mode"; fi)
diff --git a/docker/default/Dockerfile b/docker/default/Dockerfile
diff --git a/docker/gpu/Dockerfile b/docker/gpu/Dockerfile
@@ -0,0 +1,69 @@
+# includes python3.10 (but we use `python-is-python3` to set `python` cmd to `python3` for ease-of-use)
+FROM nvidia/cuda:12.1.1-runtime-ubuntu22.04
+
+# Metadata as per https://github.com/opencontainers/image-spec/blob/master/annotations.md
+LABEL org.opencontainers.image.authors="Bram Vanroy"
+LABEL org.opencontainers.image.title="MAchine Translation Evaluation Online - Demo"
+
+# Avoid prompts from apt
+ENV DEBIAN_FRONTEND=noninteractive
+
+# Install dependencies in a single RUN command to reduce image layers
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
+    curl \
+    git \
+    python3-pip \
+    python-is-python3 \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+# Create a non-root user
+RUN useradd -m -u 1000 mateo_user
+USER mateo_user
+ENV HOME="/home/mateo_user"
+
+# Environment variables
+ENV PORT=7860 \
+    SERVER="localhost" \
+    BASE="" \
+    DEMO_MODE="" \
+    HF_HUB_ENABLE_HF_TRANSFER=1 \
+    PATH="${HOME}/.local/bin:${PATH}" \
+    USE_CUDA=true
+
+WORKDIR /mateo
+
+# Clone the repository
+RUN git clone https://github.com/BramVanroy/mateo-demo.git
+WORKDIR mateo-demo
+
+# Install Python dependencies with conditional torch installation
+RUN python -m pip install --no-cache-dir --upgrade pip wheel setuptools \
+    && python -m pip install --no-cache-dir torch==2.2.1 --index-url https://download.pytorch.org/whl/cu121 \
+    && python -m pip install --no-cache-dir --upgrade .
+
+# Pre-download default models
+RUN huggingface-cli download bert-base-multilingual-cased model.safetensors tokenizer.json vocab.txt \
+    && huggingface-cli download facebook/nllb-200-distilled-600M pytorch_model.bin sentencepiece.bpe.model tokenizer.json \
+    && python -c "import comet; from comet import download_model; download_model('Unbabel/wmt22-comet-da')" \
+    && python -c "import evaluate; evaluate.load('bleurt', 'BLEURT-20')"
+
+# Expose the port the app runs on
+EXPOSE $PORT
+
+# Healthcheck to ensure the service is running
+HEALTHCHECK CMD curl --fail http://$SERVER:$PORT$BASE/_stcore/health || exit 1
+
+# Set the working directory to the Streamlit app
+WORKDIR src/mateo_st
+
+# Simplify the CMD script with conditional --use_cuda flag
+CMD streamlit run 01_🎈_MATEO.py \
+    --server.port $PORT \
+    --server.address $(if [ "$SERVER" = "localhost" ]; then echo "0.0.0.0"; else echo $SERVER; fi) \
+    $(if [ -n "$BASE" ]; then echo "--server.baseUrlPath $BASE"; fi) \
+    $(if [ "$DEMO_MODE" = "true" ]; then echo "--server.maxUploadSize 1"; fi) \
+    -- \
+    --use_cuda \
+    $(if [ "$DEMO_MODE" = "true" ]; then echo "--demo_mode"; fi)
diff --git a/docker/hf-spaces/Dockerfile b/docker/hf-spaces/Dockerfile
@@ -1,37 +1,57 @@
-FROM ubuntu:latest
-LABEL authors="Bram Vanroy"
+FROM python:3.11-slim-bookworm
 
+# Metadata as per https://github.com/opencontainers/image-spec/blob/master/annotations.md
+LABEL org.opencontainers.image.authors="Bram Vanroy"
+LABEL org.opencontainers.image.title="MAchine Translation Evaluation Online - Demo"
+
+# Avoid prompts from apt
 ENV DEBIAN_FRONTEND=noninteractive
-RUN apt-get -y update \
-    && apt-get -y install build-essential curl git software-properties-common
 
-RUN add-apt-repository ppa:deadsnakes/ppa \
-    && apt-get -y update \
-    && apt-get -y install python3.10 python3.10-dev python3-pip python3.10-distutils \
-    && ln -s /usr/bin/python3.10 /usr/bin/python \
+# Install dependencies in a single RUN command to reduce image layers
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
+    curl \
+    git \
+    && apt-get clean \
     && rm -rf /var/lib/apt/lists/*
 
-RUN useradd -m -u 1000 user
-USER user
-ENV HOME /home/user
-ENV PATH $HOME/.local/bin:$PATH
-ENV HF_HUB_ENABLE_HF_TRANSFER=1
+# Create a non-root user
+RUN useradd -m -u 1000 mateo_user
+USER mateo_user
+ENV HOME="/home/mateo_user"
+
+# Environment variables
+ENV PORT=7860 \
+    SERVER="localhost" \
+    HF_HUB_ENABLE_HF_TRANSFER=1 \
+    PATH="${HOME}/.local/bin:${PATH}" \
+    USE_CUDA=false
 
-WORKDIR $HOME
+WORKDIR ${HOME}/mateo
+
+# Clone the repository
 RUN git clone https://github.com/BramVanroy/mateo-demo.git
-WORKDIR $HOME/mateo-demo
+WORKDIR mateo-demo
 
-RUN python -m pip install --no-cache-dir --upgrade pip && python -m pip install --no-cache-dir --upgrade .
+# Install Python dependencies with conditional torch installation
+RUN python -m pip install --no-cache-dir --upgrade pip wheel setuptools \
+    && python -m pip install --no-cache-dir torch==2.2.1+cpu -f https://download.pytorch.org/whl/torch \
+    && python -m pip install --no-cache-dir --upgrade .
 
 # Pre-download default models
-RUN python -c "import comet; from comet import download_model; download_model('Unbabel/wmt22-comet-da')"
-RUN python -c "import evaluate; evaluate.load('bleurt', 'BLEURT-20')"
-RUN huggingface-cli download bert-base-multilingual-cased model.safetensors tokenizer.json vocab.txt
-RUN huggingface-cli download facebook/nllb-200-distilled-600M pytorch_model.bin sentencepiece.bpe.model tokenizer.json
+RUN huggingface-cli download bert-base-multilingual-cased model.safetensors tokenizer.json vocab.txt; \
+    huggingface-cli download facebook/nllb-200-distilled-600M pytorch_model.bin sentencepiece.bpe.model tokenizer.json; \
+    python -c "import comet; from comet import download_model; download_model('Unbabel/wmt22-comet-da')"; \
+    python -c "import evaluate; evaluate.load('bleurt', 'BLEURT-20')"
+
+# Expose the port the app runs on
+EXPOSE $PORT
 
-EXPOSE 7860
-HEALTHCHECK CMD curl --fail http://localhost:7860/_stcore/health
+# Healthcheck to ensure the service is running
+HEALTHCHECK CMD curl --fail http://$SERVER:$PORT/_stcore/health || exit 1
 
-WORKDIR $HOME/mateo-demo/src/mateo_st
+# Set the working directory to the Streamlit app
+WORKDIR src/mateo_st
 
-CMD ["streamlit", "run", "01_🎈_MATEO.py", "--server.port", "7860", "--server.enableXsrfProtection", "false", "--", "--no_cuda"]
+# Launch app
+CMD streamlit run 01_🎈_MATEO.py --server.port $PORT --server.enableXsrfProtection false -- --no_cuda
diff --git a/src/mateo_st/pages/02_📖_Translate.py b/src/mateo_st/pages/02_📖_Translate.py
@@ -91,7 +91,7 @@ def _swap_languages():
                 tgt_lang=st.session_state["tgt_lang"],
                 model_size=cli_args().transl_model_size,
                 quantize=not cli_args().transl_no_quantize,
-                no_cuda=cli_args().transl_no_cuda or cli_args().no_cuda,
+                no_cuda=not cli_args().use_cuda,
             )
         except KeyError as exc:
             load_info.exception(exc)

diff --git a/src/mateo_st/translator.py b/src/mateo_st/translator.py
@@ -19,7 +19,7 @@ class Translator:
     src_lang: str
     tgt_lang: str
     model_size: str = DEFAULT_MODEL_SIZE
-    no_cuda: bool = False
+    no_cuda: bool = True
     quantize: bool = True
     model: Any = field(default=None, init=False)
     tokenizer: Any = field(default=None, init=False)
@@ -101,7 +101,7 @@ def batchify(sentences: List[str], batch_size: int):
 
 
 @st.cache_resource(show_spinner=False, max_entries=1)
-def init_model(model_name: str, no_cuda: bool = False, quantize: bool = True):
+def init_model(model_name: str, no_cuda: bool = True, quantize: bool = True):
     # We defer loading of transformers and optimum because _sometimes_ there are
     # import errors triggered if we put them at the top. Don't know why...
     from optimum.bettertransformer import BetterTransformer

diff --git a/src/mateo_st/utils.py b/src/mateo_st/utils.py
@@ -1,5 +1,6 @@
 import base64
 import json
+import logging
 import os
 from argparse import Namespace
 from io import BytesIO, StringIO
@@ -10,6 +11,8 @@
 import numpy as np
 import pandas as pd
 import streamlit as st
+import torch
+
 from mateo_st.translator import (
     DEFAULT_BATCH_SIZE,
     DEFAULT_MAX_LENGTH,
@@ -56,63 +59,37 @@ def create_download_link(
 def cli_args():
     import argparse
 
+    defaults = {
+        "use_cuda": False,
+        "transl_batch_size": DEFAULT_BATCH_SIZE,
+        "transl_no_quantize": False,
+        "transl_model_size": DEFAULT_MODEL_SIZE,
+        "transl_num_beams": DEFAULT_NUM_BEAMS,
+        "transl_max_length": DEFAULT_MAX_LENGTH,
+        "eval_max_sys": 4,
+        "demo_mode": False,
+
+    }
     cparser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    cparser.add_argument("--no_cuda", action="store_true", help="whether to disable CUDA for all tasks")
-    cparser.add_argument("--transl_no_cuda", action="store_true", help="whether to disable CUDA for translation only")
-    cparser.add_argument(
-        "--transl_batch_size", type=int, default=DEFAULT_BATCH_SIZE, help="batch size for translating"
-    )
-    cparser.add_argument(
-        "--transl_no_quantize",
-        action="store_true",
-        help="whether to disable CUDA torch quantization of the translation model. Quantization makes the model smaller"
-        " and faster but may result in lower quality. This option will disable quantization.",
-    )
-    cparser.add_argument(
-        "--transl_model_size",
-        choices=list(TRANS_SIZE2MODEL.keys()),
-        default=DEFAULT_MODEL_SIZE,
-        help="translation model size to use",
-    )
-    cparser.add_argument(
-        "--transl_num_beams",
-        type=int,
-        default=DEFAULT_NUM_BEAMS,
-        help="number of beams to  allow to generate translations with",
-    )
-    cparser.add_argument(
-        "--transl_max_length",
-        type=int,
-        default=DEFAULT_MAX_LENGTH,
-        help="maximal length to generate per sentence",
-    )
-    cparser.add_argument(
-        "--eval_max_sys",
-        type=int,
-        default=4,
-        help="max. number of systems to compare",
-    )
+    cparser.add_argument("--use_cuda", default=False, action="store_true", help="whether to use CUDA. Only affects the translation model")
     cparser.add_argument(
         "--demo_mode",
         action="store_true",
         default=False,
         help="when demo mode is enabled, only a limited range of neural check-points are available. So all metrics are"
         " available but not all of the checkpoints.",
     )
-    cparser.add_argument(
-        "--config",
-        help="an optional JSON config file that contains script arguments. NOTE: options specified in this file will"
-        " overwrite those given in the command-line.",
-    )
 
     args = cparser.parse_args()
 
-    config_file_args = json.loads(Path(args.config).read_text(encoding="utf-8")) if args.config else {}
     # Options specified in the JSON config overwrite CLI args
-    args = Namespace(**{**vars(args), **config_file_args})
+    args = Namespace(**{**defaults, **vars(args)})
+    if not torch.cuda.is_available():
+        args.use_cuda = False
+        logging.warning("CUDA is not available on this system. Disabling it.")
 
     # Disable CUDA for everything
-    if args.no_cuda:
+    if not args.use_cuda:
         os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
 
     return args