Merge remote-tracking branch 'upstream/main' into feat/cuda-opencl-ac…

…celeration
dudamsagar · Jul 7, 2023 · 2a8f62a · 2a8f62a
2 parents 174b4e9 + 7d6eee3
commit 2a8f62a
Show file tree

Hide file tree

Showing 39 changed files with 1,123 additions and 150 deletions.
diff --git a/.gitattributes b/.gitattributes
@@ -0,0 +1 @@
+utils/prompts/*.txt text eol=lf
diff --git a/.github/workflows/integration_tests.yml b/.github/workflows/integration_tests.yml
@@ -0,0 +1,43 @@
+name: Integration Tests
+
+permissions:
+  contents: write
+
+on:
+  push:
+    branches: ["main"]
+  pull_request:
+    branches: ["main"]
+  workflow_dispatch:
+
+env:
+  CARGO_TERM_COLOR: always
+
+jobs:
+  test:
+    strategy:
+      # Don't stop testing if an architecture fails
+      fail-fast: false
+      matrix:
+        model: [llama, gptneox, gptj, mpt, bloom]
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          submodules: recursive
+      - uses: dtolnay/rust-toolchain@stable
+      - name: Install dependencies
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y \
+              libssl-dev \
+              pkg-config \
+              zlib1g-dev
+      - name: Run Integration Tests for ${{ matrix.model }}
+        run: cargo run --release -p llm-test ${{ matrix.model }}
+      # Upload test results
+      - uses: actions/upload-artifact@v3
+        if: always()
+        with:
+          name: test-reports
+          path: ./.tests/results/*.json
diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
@@ -41,8 +41,77 @@ jobs:
       - name: Formatting
         run: cargo fmt --all -- --check
       - name: Clippy
-        run: cargo clippy -- -Dclippy::all
+        run: cargo clippy --workspace -- -Dclippy::all
       - name: Documentation
         env:
           RUSTDOCFLAGS: -Dwarnings
         run: cargo doc --workspace --exclude llm-cli
+
+  metal:
+    name: Build with metal support
+    runs-on: macos-latest
+    steps: 
+    - uses: actions/checkout@v3
+      with:
+        submodules: recursive
+    - uses: dtolnay/rust-toolchain@stable
+    - name: Check
+      run: cargo check --verbose
+    - name: Build
+      run: cargo build --verbose --features metal
+
+  # cuda:
+  #   name: Build with cuda support
+  #   strategy:
+  #   # Don't stop building if it fails on an OS
+  #     fail-fast: false
+  #     matrix:
+  #       os: [windows-latest, ubuntu-latest]
+  #   runs-on: ${{ matrix.os }}
+  #   steps: 
+  #     - uses: actions/checkout@v3
+  #       with:
+  #         submodules: recursive
+  #     - uses: Jimver/cuda-toolkit@v0.2.10
+  #       if: matrix.os == 'ubuntu-latest'
+  #       id: cuda-toolkit-linux
+  #       with:
+  #         cuda: '12.1.0'
+  #         method: 'network'
+  #         #See e.g. https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/
+  #         sub-packages: '["nvcc","compiler","libraries","libraries-dev","cudart","cudart-dev","libcublas","libcublas-dev"]'
+  #     - uses: Jimver/cuda-toolkit@v0.2.10
+  #       if: matrix.os == 'windows-latest'
+  #       id: cuda-toolkit-windows
+  #       with:
+  #         cuda: '12.1.0'
+  #         method: 'network'
+  #         #See https://docs.nvidia.com/cuda/cuda-installation-guide-microsoft-windows/index.html#install-the-cuda-software
+  #         sub-packages: '["nvcc","cudart","visual_studio_integration","cublas_dev","cublas"]'
+  #     - uses: dtolnay/rust-toolchain@stable
+  #     - name: Check
+  #       run: cargo check --verbose
+  #     - name: Build
+  #       run: cargo build --verbose --features cublas
+
+  opencl:
+    name: Build with opencl support
+    strategy:
+    # Don't stop building if it fails on an OS
+      fail-fast: false
+      matrix:
+        # TODO Add windows opencl build
+        os: [ubuntu-latest]
+    runs-on: ${{ matrix.os }}
+    steps: 
+      - uses: actions/checkout@v3
+        with:
+          submodules: recursive
+      - name: Install clblast
+        if: matrix.os == 'ubuntu-latest'
+        run: sudo apt install libclblast-dev 
+      - uses: dtolnay/rust-toolchain@stable
+      - name: Check
+        run: cargo check --verbose
+      - name: Build
+        run: cargo build --verbose --features clblast
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,4 @@
 /target
 /models
-.DS_Store
+.DS_Store
+/.tests
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -18,6 +18,7 @@ license = "MIT OR Apache-2.0"
 [workspace.dependencies]
 bytemuck = "1.13.1"
 bytesize = "1.1"
+env_logger = "0.10.0"
 log = "0.4"
 rand = "0.8.5"
 thiserror = "1.0"
@@ -47,3 +48,6 @@ targets = ["x86_64-unknown-linux-gnu", "x86_64-apple-darwin", "x86_64-pc-windows
 [profile.dist]
 inherits = "release"
 lto = "thin"
+
+[workspace.metadata.release]
+tag-prefix = ""
diff --git a/README.md b/README.md
@@ -134,7 +134,8 @@ To make use of the features on the `main` branch, clone the repository and then
 build it with
 
 ```shell
-git clone --recurse-submodules git@github.com:rustformers/llm.git
+git clone --recurse-submodules https://github.com/rustformers/llm
+cd llm
 cargo build --release
 ```
 
@@ -161,8 +162,21 @@ To enable hardware acceleration, see [Acceleration Support for Building section]
 
 ## Getting Models
 
-GGML files are easy to acquire. For a list of models that have been tested, see
-the [known-good models](./doc/known-good-models.md).
+GGML models are easy to acquire. They are primarily located on Hugging Face
+(see [From Hugging Face](#from-hugging-face)), but can be obtained from elsewhere.
+
+Models are distributed as single files, and do not need any additional files to
+be downloaded. However, they are quantized with different levels of precision,
+so you will need to choose a quantization level that is appropriate for your
+application.
+
+Additionally, we support Hugging Face tokenizers to improve the quality of
+tokenization. These are separate files (`tokenizer.json`) that can be used
+with the CLI using the `-v` or `-r` flags, or with the `llm` crate by
+using the appropriate `TokenizerSource` enum variant.
+
+For a list of models that have been tested, see the
+[known-good models](./doc/known-good-models.md).
 
 Certain older GGML formats are not supported by this project, but the goal is to
 maintain feature parity with the upstream GGML project. For problems relating to

diff --git a/binaries/generate-ggml-bindings/src/main.rs b/binaries/generate-ggml-bindings/src/main.rs
@@ -118,7 +118,7 @@ fn generate_extra(
 
     builder
         .generate()
-        .expect(&format!("Unable to generate {name} bindings"))
+        .unwrap_or_else(|_| panic!("Unable to generate {name} bindings"))
         .write_to_file(src_path.join(format!("{name}.rs")))
-        .expect(&format!("Couldn't write {name} bindings"));
+        .unwrap_or_else(|_| panic!("Couldn't write {name} bindings"));
 }
diff --git a/binaries/llm-cli/Cargo.toml b/binaries/llm-cli/Cargo.toml
@@ -15,14 +15,14 @@ path = "src/main.rs"
 llm = { path = "../../crates/llm", version = "0.2.0-dev", default-features = false, features = ["models"] }
 
 bytesize = { workspace = true }
+env_logger = { workspace = true }
 log = { workspace = true }
 rand = { workspace = true }
 rustyline = { workspace = true }
 spinoff = { workspace = true }
 clap = { workspace = true }
 
 bincode = "1.3.3"
-env_logger = "0.10.0"
 num_cpus = "1.15.0"
 
 color-eyre = { version = "0.6.2", default-features = false }