Skip to content

Commit

Permalink
Added local llm functionality by incorporating text-generation-webui
Browse files Browse the repository at this point in the history
  • Loading branch information
sirajperson committed Jun 9, 2023
1 parent 88c52d7 commit cf7a802
Show file tree
Hide file tree
Showing 39 changed files with 1,723 additions and 2,213 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@ COPY entrypoint.sh /entrypoint.sh
COPY wait-for-it.sh /wait-for-it.sh
RUN chmod +x /entrypoint.sh /wait-for-it.sh

CMD ["/wait-for-it.sh", "super__postgres:5432","-t","60","--","/entrypoint.sh"]
CMD ["/wait-for-it.sh", "super__postgres:5432","-t","60","--","/entrypoint.sh"]
2 changes: 1 addition & 1 deletion DockerfileCelery
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@ RUN pip install --no-cache-dir -r requirements.txt
COPY . .
COPY config.yaml .

CMD ["celery", "-A", "superagi.worker", "worker", "--loglevel=info"]
CMD ["celery", "-A", "superagi.worker", "worker", "--loglevel=info"]
115 changes: 115 additions & 0 deletions DockerfileTGWUI
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 AS env_base
# Pre-reqs
RUN apt-get update && apt-get install --no-install-recommends -y \
git vim build-essential python3-dev python3-venv python3-pip
# Instantiate venv and pre-activate
RUN pip3 install virtualenv
RUN virtualenv /venv
# Credit, Itamar Turner-Trauring: https://pythonspeed.com/articles/activate-virtualenv-dockerfile/
ENV VIRTUAL_ENV=/venv
RUN python3 -m venv $VIRTUAL_ENV
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
RUN pip3 install --upgrade pip setuptools && \
pip3 install torch torchvision torchaudio

FROM env_base AS app_base
### DEVELOPERS/ADVANCED USERS ###
# Clone oobabooga/text-generation-webui
RUN git clone https://github.com/oobabooga/text-generation-webui /src
# This is required to get multi-gpu support until the main branch updates the requirements.txt file to include llama-cpp-python 0.1.59 or greater.
COPY ./tgwui_requirements.txt /src/requirements.txt
# To use local source: comment out the git clone command then set the build arg `LCL_SRC_DIR`
#ARG LCL_SRC_DIR="text-generation-webui"
#COPY ${LCL_SRC_DIR} /src

#################################
# Copy source to app
RUN cp -ar /src /app
# Install oobabooga/text-generation-webui
RUN --mount=type=cache,target=/root/.cache/pip pip3 install -r /app/requirements.txt
# Install extensions
COPY ./tgwui_scripts/build_extensions.sh /tgwui_scripts/build_extensions.sh
RUN --mount=type=cache,target=/root/.cache/pip \
chmod +x /tgwui_scripts/build_extensions.sh && . /tgwui_scripts/build_extensions.sh
# Clone default GPTQ
#RUN git clone https://github.com/oobabooga/GPTQ-for-LLaMa.git -b cuda /app/repositories/GPTQ-for-LLaMa
# Build and install default GPTQ ('quant_cuda')
#ARG TORCH_CUDA_ARCH_LIST="6.1;7.0;7.5;8.0;8.6+PTX"
#RUN cd /app/repositories/GPTQ-for-LLaMa/ && python3 setup_cuda.py install

FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 AS base
# Runtime pre-reqs
RUN apt-get update && apt-get install --no-install-recommends -y \
python3-venv python3-dev git
# Copy app and src
COPY --from=app_base /app /app
COPY --from=app_base /src /src
# Copy and activate venv
COPY --from=app_base /venv /venv
ENV VIRTUAL_ENV=/venv
RUN python3 -m venv $VIRTUAL_ENV
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
# Finalise app setup
WORKDIR /app
EXPOSE 7860
EXPOSE 5000
EXPOSE 5005
EXPOSE 5001
# Required for Python print statements to appear in logs
ENV PYTHONUNBUFFERED=1
# Force variant layers to sync cache by setting --build-arg BUILD_DATE
ARG BUILD_DATE
ENV BUILD_DATE=$BUILD_DATE
RUN echo "$BUILD_DATE" > /build_date.txt
# Run
COPY ./tgwui_scripts/docker-entrypoint.sh /tgwui_scripts/docker-entrypoint.sh
RUN chmod +x /tgwui_scripts/docker-entrypoint.sh
ENTRYPOINT ["/tgwui_scripts/docker-entrypoint.sh"]


# VARIANT BUILDS
FROM base AS cuda
RUN echo "CUDA" >> /variant.txt
RUN apt-get install --no-install-recommends -y git python3-dev python3-pip
RUN rm -rf /app/repositories/GPTQ-for-LLaMa && \
git clone https://github.com/qwopqwop200/GPTQ-for-LLaMa -b cuda /app/repositories/GPTQ-for-LLaMa
RUN pip3 uninstall -y quant-cuda && \
sed -i 's/^safetensors==0\.3\.0$/safetensors/g' /app/repositories/GPTQ-for-LLaMa/requirements.txt && \
pip3 install -r /app/repositories/GPTQ-for-LLaMa/requirements.txt
ENV EXTRA_LAUNCH_ARGS=""
CMD ["python3", "/app/server.py"]

FROM base AS triton
RUN echo "TRITON" >> /variant.txt
RUN apt-get install --no-install-recommends -y git python3-dev build-essential python3-pip
RUN rm -rf /app/repositories/GPTQ-for-LLaMa && \
git clone https://github.com/qwopqwop200/GPTQ-for-LLaMa -b triton /app/repositories/GPTQ-for-LLaMa
RUN pip3 uninstall -y quant-cuda && \
sed -i 's/^safetensors==0\.3\.0$/safetensors/g' /app/repositories/GPTQ-for-LLaMa/requirements.txt && \
pip3 install -r /app/repositories/GPTQ-for-LLaMa/requirements.txt
ENV EXTRA_LAUNCH_ARGS=""
CMD ["python3", "/app/server.py"]

FROM base AS llama-cublas
RUN echo "LLAMA-CUBLAS" >> /variant.txt
RUN apt-get install --no-install-recommends -y git python3-dev build-essential python3-pip
ENV LLAMA_CUBLAS=1
RUN pip uninstall -y llama-cpp-python && \
CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python
ENV EXTRA_LAUNCH_ARGS=""
CMD ["python3", "/app/server.py"]

FROM base AS monkey-patch
RUN echo "4-BIT MONKEY-PATCH" >> /variant.txt
RUN apt-get install --no-install-recommends -y git python3-dev build-essential python3-pip
RUN git clone https://github.com/johnsmith0031/alpaca_lora_4bit /app/repositories/alpaca_lora_4bit && \
cd /app/repositories/alpaca_lora_4bit && git checkout 2f704b93c961bf202937b10aac9322b092afdce0
ARG TORCH_CUDA_ARCH_LIST="8.6"
RUN pip install git+https://github.com/sterlind/GPTQ-for-LLaMa.git@lora_4bit
ENV EXTRA_LAUNCH_ARGS=""
CMD ["python3", "/app/server.py", "--monkey-patch"]

FROM base AS default
RUN echo "DEFAULT" >> /variant.txt
ENV EXTRA_LAUNCH_ARGS=""
CMD ["python3", "/app/server.py"]
46 changes: 43 additions & 3 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,23 +8,25 @@ services:
ports:
- "8001:8001"
depends_on:
- tgwui
- super__redis
- super__postgres
networks:
- super_network

celery:
volumes:
- "./:/app"
build:
context: .
dockerfile: DockerfileCelery
depends_on:
- tgwui
- super__redis
- super__postgres
networks:
- super_network

gui:
build: ./gui
ports:
Expand All @@ -38,6 +40,44 @@ services:
- /app/node_modules
- /app/.next

tgwui:
build:
context: .
target: llama-cublas # Specify the variant to build
dockerfile: DockerfileTGWUI
args:
- LCL_SRC_DIR=text-generation-webui # Developers - see Dockerfile app_base
container_name: text-generation-webui
environment:
- EXTRA_LAUNCH_ARGS="--chat --listen --no-mmap --verbose --extensions openai --gpu-memory 22 22 --n-gpu-layers 100 --threads 8 --model 30b-Lazarus.ggmlv3.q8_0.bin --auto-devices" # Custom launch args (e.g., --model MODEL_NAME)
ports:
- 7860:7860 # Default web port
- 5000:5000 # Default API port
- 5005:5005 # Default streaming port
- 5001:5001 # Default OpenAI API extension port
volumes:
- ./tgwui_config/loras:/app/loras
- ./tgwui_config/models:/app/models
- ./tgwui_config/presets:/app/presets
- ./tgwui_config/prompts:/app/prompts
- ./tgwui_config/softprompts:/app/softprompts
- ./tgwui_config/training:/app/training
logging:
driver: json-file
options:
max-file: "3" # number of files or file count
max-size: '10m'
networks:
- super_network
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ['0,1']
capabilities: [gpu]


super__redis:
image: "redis:latest"
networks:
Expand All @@ -61,4 +101,4 @@ networks:
driver: bridge

volumes:
superagi_postgres_data:
superagi_postgres_data:
64 changes: 64 additions & 0 deletions docker-compose.yaml.bak
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
version: '3.8'

services:
backend:
volumes:
- "./:/app"
build: .
ports:
- "8001:8001"
depends_on:
- super__redis
- super__postgres
networks:
- super_network

celery:
volumes:
- "./:/app"
build:
context: .
dockerfile: DockerfileCelery
depends_on:
- super__redis
- super__postgres
networks:
- super_network

gui:
build: ./gui
ports:
- "3000:3000"
environment:
- NEXT_PUBLIC_API_BASE_URL=http://localhost:8001
networks:
- super_network
volumes:
- ./gui:/app
- /app/node_modules
- /app/.next

super__redis:
image: "redis:latest"
networks:
- super_network

super__postgres:
image: "postgres:latest"
environment:
- POSTGRES_USER=superagi
- POSTGRES_PASSWORD=password
- POSTGRES_DB=super_agi_main
volumes:
- superagi_postgres_data:/var/lib/postgresql/data/
networks:
- super_network
ports:
- "5432:5432"

networks:
super_network:
driver: bridge

volumes:
superagi_postgres_data:
Loading

0 comments on commit cf7a802

Please sign in to comment.