Skip to content

Commit

Permalink
sync : llama.cpp (CUDA opts, ggml-quants, YARN, etc.) (ggerganov#601)
Browse files Browse the repository at this point in the history
ggml-ci
  • Loading branch information
ggerganov authored Nov 3, 2023
1 parent d271d35 commit ff90433
Show file tree
Hide file tree
Showing 11 changed files with 9,315 additions and 3,564 deletions.
27 changes: 24 additions & 3 deletions include/ggml/ggml.h
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@
#define GGML_MAX_CONTEXTS 64
#define GGML_MAX_SRC 6
#define GGML_MAX_NAME 64
#define GGML_MAX_OP_PARAMS 32
#define GGML_MAX_OP_PARAMS 64
#define GGML_DEFAULT_N_THREADS 4
#define GGML_DEFAULT_GRAPH_SIZE 2048
#if UINTPTR_MAX == 0xFFFFFFFF
Expand Down Expand Up @@ -1330,8 +1330,13 @@ extern "C" {
int n_dims,
int mode,
int n_ctx,
int n_orig_ctx,
float freq_base,
float freq_scale);
float freq_scale,
float ext_factor,
float attn_factor,
float beta_fast,
float beta_slow);

// in-place, returns view(a)
GGML_API struct ggml_tensor * ggml_rope_custom_inplace(
Expand All @@ -1341,8 +1346,17 @@ extern "C" {
int n_dims,
int mode,
int n_ctx,
int n_orig_ctx,
float freq_base,
float freq_scale);
float freq_scale,
float ext_factor,
float attn_factor,
float beta_fast,
float beta_slow);

// compute correction dims for YaRN RoPE scaling
void ggml_rope_yarn_corr_dims(
int n_dims, int n_orig_ctx, float freq_base, float beta_fast, float beta_slow, float dims[2]);

// xPos RoPE, in-place, returns view(a)
GGML_API struct ggml_tensor * ggml_rope_xpos_inplace(
Expand Down Expand Up @@ -1941,12 +1955,19 @@ extern "C" {
// quantization
//

// TODO: these would probably get removed in favor of the more general ggml_quantize_chunk
GGML_API size_t ggml_quantize_q4_0(const float * src, void * dst, int n, int k, int64_t * hist);
GGML_API size_t ggml_quantize_q4_1(const float * src, void * dst, int n, int k, int64_t * hist);
GGML_API size_t ggml_quantize_q5_0(const float * src, void * dst, int n, int k, int64_t * hist);
GGML_API size_t ggml_quantize_q5_1(const float * src, void * dst, int n, int k, int64_t * hist);
GGML_API size_t ggml_quantize_q8_0(const float * src, void * dst, int n, int k, int64_t * hist);

GGML_API size_t ggml_quantize_q2_K(const float * src, void * dst, int n, int k, int64_t * hist);
GGML_API size_t ggml_quantize_q3_K(const float * src, void * dst, int n, int k, int64_t * hist);
GGML_API size_t ggml_quantize_q4_K(const float * src, void * dst, int n, int k, int64_t * hist);
GGML_API size_t ggml_quantize_q5_K(const float * src, void * dst, int n, int k, int64_t * hist);
GGML_API size_t ggml_quantize_q6_K(const float * src, void * dst, int n, int k, int64_t * hist);

GGML_API size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, int start, int n, int64_t * hist);

//
Expand Down
32 changes: 19 additions & 13 deletions scripts/sync-llama.sh
Original file line number Diff line number Diff line change
@@ -1,18 +1,24 @@
#!/bin/bash

cp -rpv ../llama.cpp/ggml.c src/ggml.c
cp -rpv ../llama.cpp/ggml-alloc.c src/ggml-alloc.c
cp -rpv ../llama.cpp/ggml-backend.c src/ggml-backend.c
cp -rpv ../llama.cpp/ggml-cuda.h src/ggml-cuda.h
cp -rpv ../llama.cpp/ggml-cuda.cu src/ggml-cuda.cu
cp -rpv ../llama.cpp/ggml-opencl.h src/ggml-opencl.h
cp -rpv ../llama.cpp/ggml-opencl.cpp src/ggml-opencl.cpp
cp -rpv ../llama.cpp/ggml-metal.h src/ggml-metal.h
cp -rpv ../llama.cpp/ggml-metal.m src/ggml-metal.m
cp -rpv ../llama.cpp/ggml-metal.metal src/ggml-metal.metal
cp -rpv ../llama.cpp/ggml.h include/ggml/ggml.h
cp -rpv ../llama.cpp/ggml-alloc.h include/ggml/ggml-alloc.h
cp -rpv ../llama.cpp/ggml-backend.h include/ggml/ggml-backend.h
cp -rpv ../llama.cpp/ggml.c src/ggml.c
cp -rpv ../llama.cpp/ggml-alloc.c src/ggml-alloc.c
cp -rpv ../llama.cpp/ggml-backend-impl.c src/ggml-backend-impl.c
cp -rpv ../llama.cpp/ggml-backend.c src/ggml-backend.c
cp -rpv ../llama.cpp/ggml-cuda.cu src/ggml-cuda.cu
cp -rpv ../llama.cpp/ggml-cuda.h src/ggml-cuda.h
cp -rpv ../llama.cpp/ggml-impl.h src/ggml-impl.h
cp -rpv ../llama.cpp/ggml-metal.h src/ggml-metal.h
cp -rpv ../llama.cpp/ggml-metal.m src/ggml-metal.m
cp -rpv ../llama.cpp/ggml-metal.metal src/ggml-metal.metal
#cp -rpv ../llama.cpp/ggml-mpi.h src/ggml-mpi.h
#cp -rpv ../llama.cpp/ggml-mpi.c src/ggml-mpi.c
cp -rpv ../llama.cpp/ggml-opencl.cpp src/ggml-opencl.cpp
cp -rpv ../llama.cpp/ggml-opencl.h src/ggml-opencl.h
cp -rpv ../llama.cpp/ggml-quants.c src/ggml-quants.c
cp -rpv ../llama.cpp/ggml-quants.h src/ggml-quants.h
cp -rpv ../llama.cpp/ggml.h include/ggml/ggml.h
cp -rpv ../llama.cpp/ggml-alloc.h include/ggml/ggml-alloc.h
cp -rpv ../llama.cpp/ggml-backend.h include/ggml/ggml-backend.h

cp -rpv ../llama.cpp/tests/test-opt.cpp tests/test-opt.cpp
cp -rpv ../llama.cpp/tests/test-grad0.cpp tests/test-grad0.cpp
Expand Down
1 change: 1 addition & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,7 @@ add_library(${TARGET}
ggml.c
ggml-alloc.c
ggml-backend.c
ggml-quants.c
ggml-impl.h
ggml-backend-impl.h
../include/ggml/ggml.h
Expand Down
Loading

0 comments on commit ff90433

Please sign in to comment.