Skip to content

Commit

Permalink
Docker improvements, remove CUDA default (#14)
Browse files Browse the repository at this point in the history
* change default to no cuda

* update Dockerfiles
  • Loading branch information
BramVanroy authored Mar 5, 2024
1 parent 45e3fcf commit 62197dd
Show file tree
Hide file tree
Showing 7 changed files with 201 additions and 127 deletions.
65 changes: 65 additions & 0 deletions docker/cpu/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
FROM python:3.11-slim-bookworm

# Metadata as per https://github.com/opencontainers/image-spec/blob/master/annotations.md
LABEL org.opencontainers.image.authors="Bram Vanroy"
LABEL org.opencontainers.image.title="MAchine Translation Evaluation Online - Demo"

# Avoid prompts from apt
ENV DEBIAN_FRONTEND=noninteractive

# Install dependencies in a single RUN command to reduce image layers
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
curl \
git \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*

# Create a non-root user
RUN useradd -m -u 1000 mateo_user
USER mateo_user
ENV HOME="/home/mateo_user"

# Environment variables
ENV PORT=7860 \
SERVER="localhost" \
BASE="" \
DEMO_MODE="" \
HF_HUB_ENABLE_HF_TRANSFER=1 \
PATH="${HOME}/.local/bin:${PATH}" \
USE_CUDA=false

WORKDIR ${HOME}/mateo

# Clone the repository
RUN git clone https://github.com/BramVanroy/mateo-demo.git
WORKDIR mateo-demo

# Install Python dependencies with conditional torch installation
RUN python -m pip install --no-cache-dir --upgrade pip wheel setuptools \
&& python -m pip install --no-cache-dir torch==2.2.1+cpu -f https://download.pytorch.org/whl/torch \
&& python -m pip install --no-cache-dir --upgrade .

# Pre-download default models
RUN huggingface-cli download bert-base-multilingual-cased model.safetensors tokenizer.json vocab.txt; \
huggingface-cli download facebook/nllb-200-distilled-600M pytorch_model.bin sentencepiece.bpe.model tokenizer.json; \
python -c "import comet; from comet import download_model; download_model('Unbabel/wmt22-comet-da')"; \
python -c "import evaluate; evaluate.load('bleurt', 'BLEURT-20')"

# Expose the port the app runs on
EXPOSE $PORT

# Healthcheck to ensure the service is running
HEALTHCHECK CMD curl --fail http://$SERVER:$PORT$BASE/_stcore/health || exit 1

# Set the working directory to the Streamlit app
WORKDIR src/mateo_st

# Simplify the CMD script with conditional --use_cuda flag
CMD streamlit run 01_🎈_MATEO.py \
--server.port $PORT \
--server.address $(if [ "$SERVER" = "localhost" ]; then echo "0.0.0.0"; else echo $SERVER; fi) \
$(if [ -n "$BASE" ]; then echo "--server.baseUrlPath $BASE"; fi) \
$(if [ "$DEMO_MODE" = "true" ]; then echo "--server.maxUploadSize 1"; fi) \
-- \
$(if [ "$DEMO_MODE" = "true" ]; then echo "--demo_mode"; fi)
57 changes: 0 additions & 57 deletions docker/default/Dockerfile

This file was deleted.

69 changes: 69 additions & 0 deletions docker/gpu/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# includes python3.10 (but we use `python-is-python3` to set `python` cmd to `python3` for ease-of-use)
FROM nvidia/cuda:12.1.1-runtime-ubuntu22.04

# Metadata as per https://github.com/opencontainers/image-spec/blob/master/annotations.md
LABEL org.opencontainers.image.authors="Bram Vanroy"
LABEL org.opencontainers.image.title="MAchine Translation Evaluation Online - Demo"

# Avoid prompts from apt
ENV DEBIAN_FRONTEND=noninteractive

# Install dependencies in a single RUN command to reduce image layers
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
curl \
git \
python3-pip \
python-is-python3 \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*

# Create a non-root user
RUN useradd -m -u 1000 mateo_user
USER mateo_user
ENV HOME="/home/mateo_user"

# Environment variables
ENV PORT=7860 \
SERVER="localhost" \
BASE="" \
DEMO_MODE="" \
HF_HUB_ENABLE_HF_TRANSFER=1 \
PATH="${HOME}/.local/bin:${PATH}" \
USE_CUDA=true

WORKDIR /mateo

# Clone the repository
RUN git clone https://github.com/BramVanroy/mateo-demo.git
WORKDIR mateo-demo

# Install Python dependencies with conditional torch installation
RUN python -m pip install --no-cache-dir --upgrade pip wheel setuptools \
&& python -m pip install --no-cache-dir torch==2.2.1 --index-url https://download.pytorch.org/whl/cu121 \
&& python -m pip install --no-cache-dir --upgrade .

# Pre-download default models
RUN huggingface-cli download bert-base-multilingual-cased model.safetensors tokenizer.json vocab.txt \
&& huggingface-cli download facebook/nllb-200-distilled-600M pytorch_model.bin sentencepiece.bpe.model tokenizer.json \
&& python -c "import comet; from comet import download_model; download_model('Unbabel/wmt22-comet-da')" \
&& python -c "import evaluate; evaluate.load('bleurt', 'BLEURT-20')"

# Expose the port the app runs on
EXPOSE $PORT

# Healthcheck to ensure the service is running
HEALTHCHECK CMD curl --fail http://$SERVER:$PORT$BASE/_stcore/health || exit 1

# Set the working directory to the Streamlit app
WORKDIR src/mateo_st

# Simplify the CMD script with conditional --use_cuda flag
CMD streamlit run 01_🎈_MATEO.py \
--server.port $PORT \
--server.address $(if [ "$SERVER" = "localhost" ]; then echo "0.0.0.0"; else echo $SERVER; fi) \
$(if [ -n "$BASE" ]; then echo "--server.baseUrlPath $BASE"; fi) \
$(if [ "$DEMO_MODE" = "true" ]; then echo "--server.maxUploadSize 1"; fi) \
-- \
--use_cuda \
$(if [ "$DEMO_MODE" = "true" ]; then echo "--demo_mode"; fi)
68 changes: 44 additions & 24 deletions docker/hf-spaces/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,37 +1,57 @@
FROM ubuntu:latest
LABEL authors="Bram Vanroy"
FROM python:3.11-slim-bookworm

# Metadata as per https://github.com/opencontainers/image-spec/blob/master/annotations.md
LABEL org.opencontainers.image.authors="Bram Vanroy"
LABEL org.opencontainers.image.title="MAchine Translation Evaluation Online - Demo"

# Avoid prompts from apt
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get -y update \
&& apt-get -y install build-essential curl git software-properties-common

RUN add-apt-repository ppa:deadsnakes/ppa \
&& apt-get -y update \
&& apt-get -y install python3.10 python3.10-dev python3-pip python3.10-distutils \
&& ln -s /usr/bin/python3.10 /usr/bin/python \
# Install dependencies in a single RUN command to reduce image layers
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
curl \
git \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*

RUN useradd -m -u 1000 user
USER user
ENV HOME /home/user
ENV PATH $HOME/.local/bin:$PATH
ENV HF_HUB_ENABLE_HF_TRANSFER=1
# Create a non-root user
RUN useradd -m -u 1000 mateo_user
USER mateo_user
ENV HOME="/home/mateo_user"

# Environment variables
ENV PORT=7860 \
SERVER="localhost" \
HF_HUB_ENABLE_HF_TRANSFER=1 \
PATH="${HOME}/.local/bin:${PATH}" \
USE_CUDA=false

WORKDIR $HOME
WORKDIR ${HOME}/mateo

# Clone the repository
RUN git clone https://github.com/BramVanroy/mateo-demo.git
WORKDIR $HOME/mateo-demo
WORKDIR mateo-demo

RUN python -m pip install --no-cache-dir --upgrade pip && python -m pip install --no-cache-dir --upgrade .
# Install Python dependencies with conditional torch installation
RUN python -m pip install --no-cache-dir --upgrade pip wheel setuptools \
&& python -m pip install --no-cache-dir torch==2.2.1+cpu -f https://download.pytorch.org/whl/torch \
&& python -m pip install --no-cache-dir --upgrade .

# Pre-download default models
RUN python -c "import comet; from comet import download_model; download_model('Unbabel/wmt22-comet-da')"
RUN python -c "import evaluate; evaluate.load('bleurt', 'BLEURT-20')"
RUN huggingface-cli download bert-base-multilingual-cased model.safetensors tokenizer.json vocab.txt
RUN huggingface-cli download facebook/nllb-200-distilled-600M pytorch_model.bin sentencepiece.bpe.model tokenizer.json
RUN huggingface-cli download bert-base-multilingual-cased model.safetensors tokenizer.json vocab.txt; \
huggingface-cli download facebook/nllb-200-distilled-600M pytorch_model.bin sentencepiece.bpe.model tokenizer.json; \
python -c "import comet; from comet import download_model; download_model('Unbabel/wmt22-comet-da')"; \
python -c "import evaluate; evaluate.load('bleurt', 'BLEURT-20')"

# Expose the port the app runs on
EXPOSE $PORT

EXPOSE 7860
HEALTHCHECK CMD curl --fail http://localhost:7860/_stcore/health
# Healthcheck to ensure the service is running
HEALTHCHECK CMD curl --fail http://$SERVER:$PORT/_stcore/health || exit 1

WORKDIR $HOME/mateo-demo/src/mateo_st
# Set the working directory to the Streamlit app
WORKDIR src/mateo_st

CMD ["streamlit", "run", "01_🎈_MATEO.py", "--server.port", "7860", "--server.enableXsrfProtection", "false", "--", "--no_cuda"]
# Launch app
CMD streamlit run 01_🎈_MATEO.py --server.port $PORT --server.enableXsrfProtection false -- --no_cuda
2 changes: 1 addition & 1 deletion src/mateo_st/pages/02_📖_Translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def _swap_languages():
tgt_lang=st.session_state["tgt_lang"],
model_size=cli_args().transl_model_size,
quantize=not cli_args().transl_no_quantize,
no_cuda=cli_args().transl_no_cuda or cli_args().no_cuda,
no_cuda=not cli_args().use_cuda,
)
except KeyError as exc:
load_info.exception(exc)
Expand Down
4 changes: 2 additions & 2 deletions src/mateo_st/translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ class Translator:
src_lang: str
tgt_lang: str
model_size: str = DEFAULT_MODEL_SIZE
no_cuda: bool = False
no_cuda: bool = True
quantize: bool = True
model: Any = field(default=None, init=False)
tokenizer: Any = field(default=None, init=False)
Expand Down Expand Up @@ -101,7 +101,7 @@ def batchify(sentences: List[str], batch_size: int):


@st.cache_resource(show_spinner=False, max_entries=1)
def init_model(model_name: str, no_cuda: bool = False, quantize: bool = True):
def init_model(model_name: str, no_cuda: bool = True, quantize: bool = True):
# We defer loading of transformers and optimum because _sometimes_ there are
# import errors triggered if we put them at the top. Don't know why...
from optimum.bettertransformer import BetterTransformer
Expand Down
63 changes: 20 additions & 43 deletions src/mateo_st/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import base64
import json
import logging
import os
from argparse import Namespace
from io import BytesIO, StringIO
Expand All @@ -10,6 +11,8 @@
import numpy as np
import pandas as pd
import streamlit as st
import torch

from mateo_st.translator import (
DEFAULT_BATCH_SIZE,
DEFAULT_MAX_LENGTH,
Expand Down Expand Up @@ -56,63 +59,37 @@ def create_download_link(
def cli_args():
import argparse

defaults = {
"use_cuda": False,
"transl_batch_size": DEFAULT_BATCH_SIZE,
"transl_no_quantize": False,
"transl_model_size": DEFAULT_MODEL_SIZE,
"transl_num_beams": DEFAULT_NUM_BEAMS,
"transl_max_length": DEFAULT_MAX_LENGTH,
"eval_max_sys": 4,
"demo_mode": False,

}
cparser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
cparser.add_argument("--no_cuda", action="store_true", help="whether to disable CUDA for all tasks")
cparser.add_argument("--transl_no_cuda", action="store_true", help="whether to disable CUDA for translation only")
cparser.add_argument(
"--transl_batch_size", type=int, default=DEFAULT_BATCH_SIZE, help="batch size for translating"
)
cparser.add_argument(
"--transl_no_quantize",
action="store_true",
help="whether to disable CUDA torch quantization of the translation model. Quantization makes the model smaller"
" and faster but may result in lower quality. This option will disable quantization.",
)
cparser.add_argument(
"--transl_model_size",
choices=list(TRANS_SIZE2MODEL.keys()),
default=DEFAULT_MODEL_SIZE,
help="translation model size to use",
)
cparser.add_argument(
"--transl_num_beams",
type=int,
default=DEFAULT_NUM_BEAMS,
help="number of beams to allow to generate translations with",
)
cparser.add_argument(
"--transl_max_length",
type=int,
default=DEFAULT_MAX_LENGTH,
help="maximal length to generate per sentence",
)
cparser.add_argument(
"--eval_max_sys",
type=int,
default=4,
help="max. number of systems to compare",
)
cparser.add_argument("--use_cuda", default=False, action="store_true", help="whether to use CUDA. Only affects the translation model")
cparser.add_argument(
"--demo_mode",
action="store_true",
default=False,
help="when demo mode is enabled, only a limited range of neural check-points are available. So all metrics are"
" available but not all of the checkpoints.",
)
cparser.add_argument(
"--config",
help="an optional JSON config file that contains script arguments. NOTE: options specified in this file will"
" overwrite those given in the command-line.",
)

args = cparser.parse_args()

config_file_args = json.loads(Path(args.config).read_text(encoding="utf-8")) if args.config else {}
# Options specified in the JSON config overwrite CLI args
args = Namespace(**{**vars(args), **config_file_args})
args = Namespace(**{**defaults, **vars(args)})
if not torch.cuda.is_available():
args.use_cuda = False
logging.warning("CUDA is not available on this system. Disabling it.")

# Disable CUDA for everything
if args.no_cuda:
if not args.use_cuda:
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

return args
Expand Down

0 comments on commit 62197dd

Please sign in to comment.