Added local llm functionality by incorporating text-generation-webui

ashwin-athappan · Jun 9, 2023 · cf7a802 · cf7a802
1 parent 88c52d7
commit cf7a802
Show file tree

Hide file tree

Showing 39 changed files with 1,723 additions and 2,213 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -8,4 +8,4 @@ COPY entrypoint.sh /entrypoint.sh
 COPY wait-for-it.sh /wait-for-it.sh
 RUN chmod +x /entrypoint.sh /wait-for-it.sh
 
-CMD ["/wait-for-it.sh", "super__postgres:5432","-t","60","--","/entrypoint.sh"]
+CMD ["/wait-for-it.sh", "super__postgres:5432","-t","60","--","/entrypoint.sh"]
diff --git a/DockerfileCelery b/DockerfileCelery
@@ -8,4 +8,4 @@ RUN pip install --no-cache-dir -r requirements.txt
 COPY . .
 COPY config.yaml .
 
-CMD ["celery", "-A", "superagi.worker", "worker", "--loglevel=info"]
+CMD ["celery", "-A", "superagi.worker", "worker", "--loglevel=info"]
diff --git a/DockerfileTGWUI b/DockerfileTGWUI
@@ -0,0 +1,115 @@
+FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 AS env_base
+# Pre-reqs
+RUN apt-get update && apt-get install --no-install-recommends -y \
+    git vim build-essential python3-dev python3-venv python3-pip
+# Instantiate venv and pre-activate
+RUN pip3 install virtualenv
+RUN virtualenv /venv
+# Credit, Itamar Turner-Trauring: https://pythonspeed.com/articles/activate-virtualenv-dockerfile/
+ENV VIRTUAL_ENV=/venv
+RUN python3 -m venv $VIRTUAL_ENV
+ENV PATH="$VIRTUAL_ENV/bin:$PATH"
+RUN pip3 install --upgrade pip setuptools && \
+    pip3 install torch torchvision torchaudio
+
+FROM env_base AS app_base 
+### DEVELOPERS/ADVANCED USERS ###
+# Clone oobabooga/text-generation-webui
+RUN git clone https://github.com/oobabooga/text-generation-webui /src
+# This is required to get multi-gpu support until the main branch updates the requirements.txt file to include llama-cpp-python 0.1.59 or greater.
+COPY ./tgwui_requirements.txt /src/requirements.txt
+# To use local source: comment out the git clone command then set the build arg `LCL_SRC_DIR`
+#ARG LCL_SRC_DIR="text-generation-webui"
+#COPY ${LCL_SRC_DIR} /src
+
+#################################
+# Copy source to app
+RUN cp -ar /src /app
+# Install oobabooga/text-generation-webui
+RUN --mount=type=cache,target=/root/.cache/pip pip3 install -r /app/requirements.txt
+# Install extensions
+COPY ./tgwui_scripts/build_extensions.sh /tgwui_scripts/build_extensions.sh
+RUN --mount=type=cache,target=/root/.cache/pip \
+    chmod +x /tgwui_scripts/build_extensions.sh && . /tgwui_scripts/build_extensions.sh
+# Clone default GPTQ
+#RUN git clone https://github.com/oobabooga/GPTQ-for-LLaMa.git -b cuda /app/repositories/GPTQ-for-LLaMa
+# Build and install default GPTQ ('quant_cuda')
+#ARG TORCH_CUDA_ARCH_LIST="6.1;7.0;7.5;8.0;8.6+PTX"
+#RUN cd /app/repositories/GPTQ-for-LLaMa/ && python3 setup_cuda.py install
+
+FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 AS base
+# Runtime pre-reqs
+RUN apt-get update && apt-get install --no-install-recommends -y \
+    python3-venv python3-dev git 
+# Copy app and src
+COPY --from=app_base /app /app
+COPY --from=app_base /src /src
+# Copy and activate venv
+COPY --from=app_base /venv /venv
+ENV VIRTUAL_ENV=/venv
+RUN python3 -m venv $VIRTUAL_ENV
+ENV PATH="$VIRTUAL_ENV/bin:$PATH"
+# Finalise app setup
+WORKDIR /app
+EXPOSE 7860
+EXPOSE 5000
+EXPOSE 5005
+EXPOSE 5001
+# Required for Python print statements to appear in logs
+ENV PYTHONUNBUFFERED=1
+# Force variant layers to sync cache by setting --build-arg BUILD_DATE
+ARG BUILD_DATE
+ENV BUILD_DATE=$BUILD_DATE
+RUN echo "$BUILD_DATE" > /build_date.txt
+# Run
+COPY ./tgwui_scripts/docker-entrypoint.sh /tgwui_scripts/docker-entrypoint.sh
+RUN chmod +x /tgwui_scripts/docker-entrypoint.sh
+ENTRYPOINT ["/tgwui_scripts/docker-entrypoint.sh"]
+
+
+# VARIANT BUILDS
+FROM base AS cuda
+RUN echo "CUDA" >> /variant.txt
+RUN apt-get install --no-install-recommends -y git python3-dev python3-pip
+RUN rm -rf /app/repositories/GPTQ-for-LLaMa && \
+    git clone https://github.com/qwopqwop200/GPTQ-for-LLaMa -b cuda /app/repositories/GPTQ-for-LLaMa
+RUN pip3 uninstall -y quant-cuda && \
+    sed -i 's/^safetensors==0\.3\.0$/safetensors/g' /app/repositories/GPTQ-for-LLaMa/requirements.txt && \
+    pip3 install -r /app/repositories/GPTQ-for-LLaMa/requirements.txt
+ENV EXTRA_LAUNCH_ARGS=""
+CMD ["python3", "/app/server.py"]
+
+FROM base AS triton
+RUN echo "TRITON" >> /variant.txt
+RUN apt-get install --no-install-recommends -y git python3-dev build-essential python3-pip
+RUN rm -rf /app/repositories/GPTQ-for-LLaMa && \
+    git clone https://github.com/qwopqwop200/GPTQ-for-LLaMa -b triton /app/repositories/GPTQ-for-LLaMa
+RUN pip3 uninstall -y quant-cuda && \
+    sed -i 's/^safetensors==0\.3\.0$/safetensors/g' /app/repositories/GPTQ-for-LLaMa/requirements.txt && \
+    pip3 install -r /app/repositories/GPTQ-for-LLaMa/requirements.txt
+ENV EXTRA_LAUNCH_ARGS=""
+CMD ["python3", "/app/server.py"]
+
+FROM base AS llama-cublas
+RUN echo "LLAMA-CUBLAS" >> /variant.txt
+RUN apt-get install --no-install-recommends -y git python3-dev build-essential python3-pip
+ENV LLAMA_CUBLAS=1
+RUN pip uninstall -y llama-cpp-python && \
+    CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python
+ENV EXTRA_LAUNCH_ARGS=""
+CMD ["python3", "/app/server.py"]
+
+FROM base AS monkey-patch
+RUN echo "4-BIT MONKEY-PATCH" >> /variant.txt
+RUN apt-get install --no-install-recommends -y git python3-dev build-essential python3-pip
+RUN git clone https://github.com/johnsmith0031/alpaca_lora_4bit /app/repositories/alpaca_lora_4bit && \
+    cd /app/repositories/alpaca_lora_4bit && git checkout 2f704b93c961bf202937b10aac9322b092afdce0
+ARG TORCH_CUDA_ARCH_LIST="8.6"
+RUN pip install git+https://github.com/sterlind/GPTQ-for-LLaMa.git@lora_4bit
+ENV EXTRA_LAUNCH_ARGS=""
+CMD ["python3", "/app/server.py", "--monkey-patch"]
+
+FROM base AS default
+RUN echo "DEFAULT" >> /variant.txt
+ENV EXTRA_LAUNCH_ARGS=""
+CMD ["python3", "/app/server.py"]
diff --git a/docker-compose.yaml b/docker-compose.yaml
@@ -8,23 +8,25 @@ services:
     ports:
       - "8001:8001"
     depends_on:
+      - tgwui
       - super__redis
       - super__postgres
     networks:
       - super_network
-
+    
   celery:
     volumes:
       - "./:/app"
     build:
       context: .
       dockerfile: DockerfileCelery
     depends_on:
+      - tgwui
       - super__redis
       - super__postgres
     networks:
       - super_network
-
+    
   gui:
     build: ./gui
     ports:
@@ -38,6 +40,44 @@ services:
       - /app/node_modules
       - /app/.next
 
+  tgwui:
+    build:
+      context: .
+      target: llama-cublas  # Specify the variant to build
+      dockerfile: DockerfileTGWUI
+      args:
+        - LCL_SRC_DIR=text-generation-webui  # Developers - see Dockerfile app_base
+    container_name: text-generation-webui
+    environment:
+      - EXTRA_LAUNCH_ARGS="--chat --listen --no-mmap --verbose --extensions openai --gpu-memory 22 22 --n-gpu-layers 100 --threads 8 --model 30b-Lazarus.ggmlv3.q8_0.bin --auto-devices" # Custom launch args (e.g., --model MODEL_NAME)
+    ports:
+      - 7860:7860  # Default web port
+      - 5000:5000  # Default API port
+      - 5005:5005  # Default streaming port
+      - 5001:5001  # Default OpenAI API extension port
+    volumes:
+      - ./tgwui_config/loras:/app/loras
+      - ./tgwui_config/models:/app/models
+      - ./tgwui_config/presets:/app/presets
+      - ./tgwui_config/prompts:/app/prompts
+      - ./tgwui_config/softprompts:/app/softprompts
+      - ./tgwui_config/training:/app/training
+    logging:
+      driver:  json-file
+      options:
+        max-file: "3"   # number of files or file count
+        max-size: '10m'
+    networks:
+      - super_network
+    deploy:
+        resources:
+          reservations:
+            devices:
+              - driver: nvidia
+                device_ids: ['0,1']
+                capabilities: [gpu]
+
+
   super__redis:
     image: "redis:latest"
     networks:
@@ -61,4 +101,4 @@ networks:
     driver: bridge
 
 volumes:
-  superagi_postgres_data:
+  superagi_postgres_data:
diff --git a/docker-compose.yaml.bak b/docker-compose.yaml.bak
@@ -0,0 +1,64 @@
+version: '3.8'
+
+services:
+  backend:
+    volumes:
+      - "./:/app"
+    build: .
+    ports:
+      - "8001:8001"
+    depends_on:
+      - super__redis
+      - super__postgres
+    networks:
+      - super_network
+
+  celery:
+    volumes:
+      - "./:/app"
+    build:
+      context: .
+      dockerfile: DockerfileCelery
+    depends_on:
+      - super__redis
+      - super__postgres
+    networks:
+      - super_network
+
+  gui:
+    build: ./gui
+    ports:
+      - "3000:3000"
+    environment:
+      - NEXT_PUBLIC_API_BASE_URL=http://localhost:8001
+    networks:
+      - super_network
+    volumes:
+      - ./gui:/app
+      - /app/node_modules
+      - /app/.next
+
+  super__redis:
+    image: "redis:latest"
+    networks:
+      - super_network
+
+  super__postgres:
+    image: "postgres:latest"
+    environment:
+      - POSTGRES_USER=superagi
+      - POSTGRES_PASSWORD=password
+      - POSTGRES_DB=super_agi_main
+    volumes:
+      - superagi_postgres_data:/var/lib/postgresql/data/
+    networks:
+      - super_network
+    ports:
+      - "5432:5432"
+
+networks:
+  super_network:
+    driver: bridge
+
+volumes:
+  superagi_postgres_data: