forked from kserve/kserve
-
Notifications
You must be signed in to change notification settings - Fork 0
/
huggingface_server.Dockerfile
55 lines (38 loc) · 1.56 KB
/
huggingface_server.Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
ARG BASE_IMAGE=nvidia/cuda:12.1.0-devel-ubuntu22.04
ARG VENV_PATH=/prod_venv
FROM ${BASE_IMAGE} as builder
# Install Poetry
ARG POETRY_HOME=/opt/poetry
ARG POETRY_VERSION=1.7.1
# Install vllm
ARG VLLM_VERSION=0.2.7
RUN apt-get update -y && apt-get install gcc python3.10-venv python3-dev -y
RUN python3 -m venv ${POETRY_HOME} && ${POETRY_HOME}/bin/pip3 install poetry==${POETRY_VERSION}
ENV PATH="$PATH:${POETRY_HOME}/bin"
# Activate virtual env
ARG VENV_PATH
ENV VIRTUAL_ENV=${VENV_PATH}
RUN python3 -m venv $VIRTUAL_ENV
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
COPY kserve/pyproject.toml kserve/poetry.lock kserve/
RUN cd kserve && poetry install --no-root --no-interaction --no-cache
COPY kserve kserve
RUN cd kserve && poetry install --no-interaction --no-cache
COPY huggingfaceserver/pyproject.toml huggingfaceserver/poetry.lock huggingfaceserver/
RUN cd huggingfaceserver && poetry install --no-root --no-interaction --no-cache
COPY huggingfaceserver huggingfaceserver
RUN cd huggingfaceserver && poetry install --no-interaction --no-cache
RUN pip3 install vllm==${VLLM_VERSION}
FROM nvidia/cuda:12.1.0-base-ubuntu22.04 as prod
RUN apt-get update -y && apt-get install python3.10-venv -y
COPY third_party third_party
# Activate virtual env
ARG VENV_PATH
ENV VIRTUAL_ENV=${VENV_PATH}
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
RUN useradd kserve -m -u 1000 -d /home/kserve
COPY --from=builder --chown=kserve:kserve $VIRTUAL_ENV $VIRTUAL_ENV
COPY --from=builder kserve kserve
COPY --from=builder huggingfaceserver huggingfaceserver
USER 1000
ENTRYPOINT ["python3", "-m", "huggingfaceserver"]