Skip to content

Commit

Permalink
ggml-backend v2 : add ggml_backend_sched (ggerganov#586)
Browse files Browse the repository at this point in the history
* ggml-backend-v2 wip

* fix metal build

* ggml-alloc : use a real backend buffer in measure mode

* backend sched : ignore view ops to reduce the number of splits

* dynamic ggml_cgraph wip

* dyn graphs : remove n_tasks from ggml_cplan

* dyn graphs : update ggml_graph_import

* reset hash table in ggml_build_forward

* ggml-alloc : split into tensor and graph allocators

* add ggml_backend_sched_set_node_backend

* remove ggml_build_forward_ctx, ggml_build_backward_ctx
add ggml_opt_params::graph_size
add ggml_new_graph_custom, ggml_graph_overhead_custom
add ggml_graph_clear

* update examples and tests, fix issues

* update more examples

* update gpt-2/main-backend.cpp from master

* ggml : fix copmile warning

* ci : update yolo, fix mnist, use gpt-2-backend

* ggml : fix uninit warning

* ci : switch to gpt-2-backend2

ggml-ci

* metal : skip noops early to avoid warnings from ggml_metal_get_buffer

---------

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
  • Loading branch information
slaren and ggerganov authored Oct 30, 2023
1 parent 05ff36f commit 08d748b
Show file tree
Hide file tree
Showing 42 changed files with 5,066 additions and 1,095 deletions.
4 changes: 2 additions & 2 deletions ci/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,8 @@ function gg_run_gpt_2 {
model="../models-mnt/gpt-2/ggml-model-gpt-2-117M.bin"
prompts="../examples/prompts/gpt-2.txt"

(time ./bin/gpt-2 --model ${model} -s 1234 -n 64 -tt ${prompts} ) 2>&1 | tee -a $OUT/${ci}-tg.log
(time ./bin/gpt-2 --model ${model} -s 1234 -n 64 -p "I believe the meaning of life is") 2>&1 | tee -a $OUT/${ci}-tg.log
(time ./bin/gpt-2-backend2 --model ${model} -s 1234 -n 64 -tt ${prompts} ) 2>&1 | tee -a $OUT/${ci}-tg.log
(time ./bin/gpt-2-backend2 --model ${model} -s 1234 -n 64 -p "I believe the meaning of life is") 2>&1 | tee -a $OUT/${ci}-tg.log

(time ./bin/gpt-2-batched --model ${model} -s 1234 -n 64 -np 8 -p "I believe the meaning of life is") 2>&1 | tee -a $OUT/${ci}-tg.log

Expand Down
10 changes: 5 additions & 5 deletions examples/dolly-v2/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -497,7 +497,7 @@ bool dollyv2_eval(
};

struct ggml_context * ctx0 = ggml_init(params);
struct ggml_cgraph gf = { };
struct ggml_cgraph * gf = ggml_new_graph(ctx0);

// KQ_pos - contains the positions
struct ggml_tensor * KQ_pos = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N);
Expand Down Expand Up @@ -555,8 +555,8 @@ bool dollyv2_eval(
( n_ctx)*ggml_element_size(model.memory_v),
(il*n_ctx)*ggml_element_size(model.memory_v)*n_embd + n_past*ggml_element_size(model.memory_v));

ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Kcur, k));
ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Vcur, v));
ggml_build_forward_expand(gf, ggml_cpy(ctx0, Kcur, k));
ggml_build_forward_expand(gf, ggml_cpy(ctx0, Vcur, v));
}

// Q = Qcur.contiguous().view(n_embd/n_head, n_head, N).permute(0, 2, 1, 3)
Expand Down Expand Up @@ -666,8 +666,8 @@ bool dollyv2_eval(
//inpL = ggml_soft_max_inplace(ctx0, inpL);

// run the computation
ggml_build_forward_expand(&gf, inpL);
ggml_graph_compute_with_ctx(ctx0, &gf, n_threads);
ggml_build_forward_expand(gf, inpL);
ggml_graph_compute_with_ctx(ctx0, gf, n_threads);

//if (n_past%100 == 0) {
// ggml_graph_print (&gf);
Expand Down
14 changes: 13 additions & 1 deletion examples/gpt-2/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,19 @@
#
# gpt-2

set(TEST_TARGET gpt-2)
set(TEST_TARGET gpt-2-ctx)
add_executable(${TEST_TARGET} main-ctx.cpp)
target_link_libraries(${TEST_TARGET} PRIVATE ggml common common-ggml)

set(TEST_TARGET gpt-2-alloc)
add_executable(${TEST_TARGET} main-alloc.cpp)
target_link_libraries(${TEST_TARGET} PRIVATE ggml common common-ggml)

set(TEST_TARGET gpt-2-backend)
add_executable(${TEST_TARGET} main-backend.cpp)
target_link_libraries(${TEST_TARGET} PRIVATE ggml common common-ggml)

set(TEST_TARGET gpt-2-backend2)
add_executable(${TEST_TARGET} main.cpp)
target_link_libraries(${TEST_TARGET} PRIVATE ggml common common-ggml)

Expand Down
Loading

0 comments on commit 08d748b

Please sign in to comment.