update llama.cpp + peg cuda architecture

basraayman · Jul 17, 2023 · a516b49 · a516b49
1 parent 4254a69
commit a516b49
Show file tree

Hide file tree

Showing 3 changed files with 18 additions and 1 deletion.
diff --git a/crates/ggml/sys/build.rs b/crates/ggml/sys/build.rs
@@ -330,13 +330,16 @@ fn enable_cublas(build: &mut cc::Build, out_dir: &Path) {
             .arg("--compile")
             .arg("-cudart")
             .arg("static")
+            .arg("--generate-code=arch=compute_52,code=[compute_52,sm_52]")
+            .arg("--generate-code=arch=compute_61,code=[compute_61,sm_61]")
             .arg("-D_WINDOWS")
             .arg("-DNDEBUG")
             .arg("-DGGML_USE_CUBLAS")
             .arg("-D_CRT_SECURE_NO_WARNINGS")
             .arg("-D_MBCS")
             .arg("-DWIN32")
             .arg(r"-Illama-cpp\include\ggml")
+            .arg(r"-Illama-cpp\include\ggml")
             .arg(r"llama-cpp\ggml-cuda.cu")
             .status()
             .unwrap_or_else(|_| panic!("{}", get_error_message()));
@@ -358,6 +361,8 @@ fn enable_cublas(build: &mut cc::Build, out_dir: &Path) {
             .arg("-Illama-cpp/include/ggml")
             .arg("-mtune=native")
             .arg("-pthread")
+            .arg("--generate-code=arch=compute_52,code=[compute_52,sm_52]")
+            .arg("--generate-code=arch=compute_61,code=[compute_61,sm_61]")
             .arg("-DGGML_USE_CUBLAS")
             .arg("-I/usr/local/cuda/include")
             .arg("-I/opt/cuda/include")

diff --git a/crates/ggml/sys/llama-cpp b/crates/ggml/sys/llama-cpp
diff --git a/crates/ggml/sys/src/lib.rs b/crates/ggml/sys/src/lib.rs
@@ -1498,6 +1498,18 @@ extern "C" {
         n_ctx: ::std::os::raw::c_int,
     ) -> *mut ggml_tensor;
 }
+extern "C" {
+    pub fn ggml_rope_custom_inplace(
+        ctx: *mut ggml_context,
+        a: *mut ggml_tensor,
+        n_past: ::std::os::raw::c_int,
+        n_dims: ::std::os::raw::c_int,
+        mode: ::std::os::raw::c_int,
+        freq_base: f32,
+        freq_scale: f32,
+        n_ctx: ::std::os::raw::c_int,
+    ) -> *mut ggml_tensor;
+}
 extern "C" {
     pub fn ggml_rope_back(
         ctx: *mut ggml_context,
+12 −8		Makefile
+1 −1		README.md
+21 −11		build.zig
+16 −0		examples/common.cpp
+2 −0		examples/common.h
+1 −1		examples/embd-input/README.md
+1 −1		examples/embd-input/llava.py
+10 −2		examples/main/main.cpp
+1 −0		examples/server/README.md
+2 −0		examples/server/chat.sh
+18 −0		examples/server/server.cpp
+4 −0		flake.nix
+415 −55		ggml-cuda.cu
+28 −25		ggml-metal.m
+107 −73		ggml-metal.metal
+98 −76		ggml.c
+11 −0		ggml.h
+8 −0		k_quants.h
+100 −48		llama.cpp
+29 −1		llama.h
+2 −0		scripts/verify-checksum-models.py