forked from ravenscroftj/turbopilot
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDockerfile.cuda12
37 lines (21 loc) · 1.07 KB
/
Dockerfile.cuda12
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
FROM nvidia/cuda:12.2.0-devel-ubuntu20.04 AS build
ENV DEBIAN_FRONTEND=noninteractive
# inlude kitware apt repo to allow us to grab latest cmake
RUN apt-get update && apt-get install ca-certificates gpg wget
RUN wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null
RUN echo 'deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ focal main' | tee /etc/apt/sources.list.d/kitware.list >/dev/null
RUN apt-get update && apt-get install -y build-essential cmake libboost-dev libboost-thread-dev
ADD ./ /turbopilot
RUN mkdir /turbopilot/build
WORKDIR /turbopilot/build
RUN cmake -DGGML_CUBLAS=ON -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc ..
RUN make turbopilot
FROM nvidia/cuda:12.2.0-runtime-ubuntu20.04 AS runtime
WORKDIR /app
COPY --from=build /turbopilot/build/bin/turbopilot /app/turbopilot
ENV THREADS=4
ENV MODEL="/models/codegen-2B-multi-ggml-4bit-quant.bin"
ENV BATCHSIZE=64
COPY ./run.sh /app/
EXPOSE 18080
CMD /app/run.sh