Merge remote-tracking branch 'upstream/main' into feat/cuda-opencl-ac…

…celeration
basraayman · Jun 30, 2023 · b1e8cd0 · b1e8cd0
2 parents a1f61b4 + 9a22269
commit b1e8cd0
Show file tree

Hide file tree

Showing 40 changed files with 1,581 additions and 989 deletions.
diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
@@ -27,7 +27,7 @@ jobs:
       - name: Build
         run: cargo build --verbose
       - name: Run tests
-        run: cargo test --verbose
+        run: cargo test --all --verbose
   fmt:
     name: Clippy, formatting and docs
     runs-on: ubuntu-latest

diff --git a/.rusty-hook.toml b/.rusty-hook.toml
@@ -0,0 +1,5 @@
+[hooks]
+pre-commit = "cargo run -p precommit-check"
+
+[logging]
+verbose = true
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/README.md b/README.md
@@ -52,6 +52,8 @@ Currently, the following models are supported:
   [Wizard](https://github.com/nlpxucan/WizardLM))
 - [MPT](https://www.mosaicml.com/blog/mpt-7b)
 
+See [getting models](#getting-models) for more information on how to download supported models.
+
 ## Using `llm` in a Rust Project
 
 This project depends on Rust v1.65.0 or above and a modern C toolchain.
@@ -86,7 +88,7 @@ opt-level = 3
 ```
 ## Leverage Accelerators with `llm`
 
-The `llm` library is engineered to take advantage of hardware accelerators such as `cuda` and `metal` for optimized performance. 
+The `llm` library is engineered to take advantage of hardware accelerators such as `cuda` and `metal` for optimized performance.
 
 To enable `llm` to harness these accelerators, some preliminary configuration steps are necessary, which vary based on your operating system. For comprehensive guidance, please refer to the [Acceleration Support for Building section](doc/CONTRIBUTING.md#acceleration-support-for-building) in our documentation.
 
@@ -173,10 +175,10 @@ llm gptneox infer -m RedPajama-INCITE-Base-3B-v1-q4_0.bin -p "Rust is a cool pro
 In the example above, the first two arguments specify the model architecture and
 command, respectively. The required `-m` argument specifies the local path to
 the model, and the required `-p` argument specifies the evaluation prompt. The
-optional `-r` argument is used to load the model's vocabulary from a remote
+optional `-r` argument is used to load the model's tokenizer from a remote
 Hugging Face 🤗 repository, which will typically improve results when compared
-to loading the vocabulary from the model file itself; there is also an optional
-`-v` argument that can be used to specify the path to a local vocabulary file.
+to loading the tokenizer from the model file itself; there is also an optional
+`-v` argument that can be used to specify the path to a local tokenizer file.
 For more information about the `llm` CLI, use the `--help` parameter.
 
 There is also a [simple inference example](./crates/llm/examples/inference.rs)

diff --git a/binaries/llm-cli/Cargo.toml b/binaries/llm-cli/Cargo.toml
@@ -28,7 +28,13 @@ num_cpus = "1.15.0"
 color-eyre = { version = "0.6.2", default-features = false }
 zstd = { version = "0.12", default-features = false }
 
+[dev-dependencies]
+rusty-hook = "^0.11.2"
+
 [features]
 cublas = ["llm/cublas"]
 clblast = ["llm/clblast"]
 metal = ["llm/metal"]
+
+# Falcon is off by default. See `llm_falcon`'s module documentation for more information.
+falcon = ["llm/falcon"]