From a516b4969bf2dac8d424a70ba5c858d746768ada Mon Sep 17 00:00:00 2001 From: Lukas Kreussel <65088241+LLukas22@users.noreply.github.com> Date: Mon, 17 Jul 2023 11:41:54 +0200 Subject: [PATCH] update `llama.cpp` + peg cuda architecture --- crates/ggml/sys/build.rs | 5 +++++ crates/ggml/sys/llama-cpp | 2 +- crates/ggml/sys/src/lib.rs | 12 ++++++++++++ 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/crates/ggml/sys/build.rs b/crates/ggml/sys/build.rs index 5a7ecfb5..3a6e8415 100644 --- a/crates/ggml/sys/build.rs +++ b/crates/ggml/sys/build.rs @@ -330,6 +330,8 @@ fn enable_cublas(build: &mut cc::Build, out_dir: &Path) { .arg("--compile") .arg("-cudart") .arg("static") + .arg("--generate-code=arch=compute_52,code=[compute_52,sm_52]") + .arg("--generate-code=arch=compute_61,code=[compute_61,sm_61]") .arg("-D_WINDOWS") .arg("-DNDEBUG") .arg("-DGGML_USE_CUBLAS") @@ -337,6 +339,7 @@ fn enable_cublas(build: &mut cc::Build, out_dir: &Path) { .arg("-D_MBCS") .arg("-DWIN32") .arg(r"-Illama-cpp\include\ggml") + .arg(r"-Illama-cpp\include\ggml") .arg(r"llama-cpp\ggml-cuda.cu") .status() .unwrap_or_else(|_| panic!("{}", get_error_message())); @@ -358,6 +361,8 @@ fn enable_cublas(build: &mut cc::Build, out_dir: &Path) { .arg("-Illama-cpp/include/ggml") .arg("-mtune=native") .arg("-pthread") + .arg("--generate-code=arch=compute_52,code=[compute_52,sm_52]") + .arg("--generate-code=arch=compute_61,code=[compute_61,sm_61]") .arg("-DGGML_USE_CUBLAS") .arg("-I/usr/local/cuda/include") .arg("-I/opt/cuda/include") diff --git a/crates/ggml/sys/llama-cpp b/crates/ggml/sys/llama-cpp index 32c54116..b7647436 160000 --- a/crates/ggml/sys/llama-cpp +++ b/crates/ggml/sys/llama-cpp @@ -1 +1 @@ -Subproject commit 32c54116318929c90fd7ae814cf9b5232cd44c36 +Subproject commit b7647436ccc80970b44a270f70f4f2ea139054d1 diff --git a/crates/ggml/sys/src/lib.rs b/crates/ggml/sys/src/lib.rs index f3d9192d..04a999df 100644 --- a/crates/ggml/sys/src/lib.rs +++ b/crates/ggml/sys/src/lib.rs @@ -1498,6 +1498,18 @@ extern "C" { n_ctx: ::std::os::raw::c_int, ) -> *mut ggml_tensor; } +extern "C" { + pub fn ggml_rope_custom_inplace( + ctx: *mut ggml_context, + a: *mut ggml_tensor, + n_past: ::std::os::raw::c_int, + n_dims: ::std::os::raw::c_int, + mode: ::std::os::raw::c_int, + freq_base: f32, + freq_scale: f32, + n_ctx: ::std::os::raw::c_int, + ) -> *mut ggml_tensor; +} extern "C" { pub fn ggml_rope_back( ctx: *mut ggml_context,