Skip to content

Commit

Permalink
Support RedPajama-Chat-3B-V1 (#99)
Browse files Browse the repository at this point in the history
  • Loading branch information
jinhongyii authored May 18, 2023
1 parent cfd78db commit fb6d0e8
Show file tree
Hide file tree
Showing 10 changed files with 201 additions and 120 deletions.
6 changes: 3 additions & 3 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[submodule "3rdparty/sentencepiece-js"]
path = 3rdparty/sentencepiece-js
url = https://github.com/tqchen/sentencepiece-js
[submodule "mlc-llm"]
path = mlc-llm
url = https://github.com/mlc-ai/mlc-llm
[submodule "3rdparty/tokenizers-cpp"]
path = 3rdparty/tokenizers-cpp
url = https://github.com/mlc-ai/tokenizers-cpp
1 change: 0 additions & 1 deletion 3rdparty/sentencepiece-js
Submodule sentencepiece-js deleted from 9c3168
1 change: 1 addition & 0 deletions 3rdparty/tokenizers-cpp
Submodule tokenizers-cpp added at a3fd73
2 changes: 1 addition & 1 deletion mlc-llm
Submodule mlc-llm updated 51 files
+0 −1 .gitignore
+3 −3 .gitmodules
+0 −1 3rdparty/sentencepiece-js
+1 −0 3rdparty/tokenizers-cpp
+0 −81 3rdparty/tokenizers-cpp/CMakeLists.txt
+0 −13 3rdparty/tokenizers-cpp/Cargo.toml
+0 −187 3rdparty/tokenizers-cpp/src/lib.rs
+0 −101 3rdparty/tokenizers-cpp/tokenizers.h
+13 −27 CMakeLists.txt
+47 −1 README.md
+2 −6 android/MLCChat/app/src/main/java/ai/mlc/mlcchat/LLMChat.java
+8 −1 android/MLCChat/app/src/main/jni/Android.mk
+17 −22 android/README.md
+1 −1 android/prepare_libs.sh
+161 −54 build.py
+30 −0 cpp/README.md
+113 −97 cpp/cli_main.cc
+442 −335 cpp/llm_chat.cc
+6 −4 cpp/llm_chat.h
+5 −3 ios/MLCChat.xcodeproj/project.pbxproj
+7 −14 ios/MLCChat/LLMChat.mm
+143 −48 ios/README.md
+8 −7 ios/prepare_libs.sh
+9 −5 ios/prepare_params.sh
+0 −36 log_db/RedPajama-INCITE-Chat-3B-v1/database_tuning_record.json
+35 −0 log_db/redpajama-3b-q4f16/database_tuning_record.json
+30 −31 log_db/redpajama-3b-q4f16/database_workload.json
+30 −0 log_db/redpajama-3b-q4f32/database_tuning_record.json
+30 −0 log_db/redpajama-3b-q4f32/database_workload.json
+53 −16 mlc_llm/conversation.py
+7 −1 mlc_llm/dispatch/gpt_neox/__init__.py
+2 −2 mlc_llm/dispatch/gpt_neox/dolly_v2_3b.py
+972 −0 mlc_llm/dispatch/gpt_neox/redpajama_incite_chat_3b_v1.py
+722 −0 mlc_llm/dispatch/gpt_neox/redpajama_incite_chat_3b_v1_mod.py
+1,010 −0 mlc_llm/dispatch/gpt_neox/redpajama_incite_chat_3b_v1_tune.py
+840 −0 mlc_llm/dispatch/gpt_neox/redpajama_q4f32.py
+577 −0 mlc_llm/dispatch/gpt_neox/redpajama_q4f32_mod.py
+743 −0 mlc_llm/dispatch/gpt_neox/redpajama_q4f32_tune.py
+23 −0 mlc_llm/relax_model/commons.py
+60 −44 mlc_llm/relax_model/gpt_neox.py
+46 −22 mlc_llm/relax_model/llama.py
+25 −5 mlc_llm/relax_model/modules.py
+103 −84 mlc_llm/relax_model/moss.py
+18 −8 mlc_llm/transform/decode_matmul_ewise.py
+5 −1 mlc_llm/transform/quantization.py
+18 −8 mlc_llm/transform/transpose_matmul.py
+229 −36 mlc_llm/tuning.py
+24 −30 mlc_llm/utils.py
+7 −3 tests/chat.py
+5 −3 tests/debug/compare_lib.py
+3 −2 tests/evaluate.py
2 changes: 1 addition & 1 deletion scripts/build_site.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ cp web/llm_chat.css site/dist/

cp dist/tvmjs_runtime.wasi.js site/dist
cp dist/tvmjs.bundle.js site/dist
cp -r dist/sentencepiece site/dist
cp -r dist/tokenizers-cpp site/dist

if [ -d "$MLC_LLM_HOME/dist/vicuna-v1-7b-q4f32_0/params" ]; then
mkdir -p site/dist/vicuna-v1-7b-q4f32_0
Expand Down
8 changes: 4 additions & 4 deletions scripts/prep_deps.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,15 @@ if [[ -z ${TVM_HOME_SET} ]]; then
export TVM_HOME="${TVM_HOME:-3rdparty/tvm}"
fi

export SENTENCEPIECE_JS_HOME="3rdparty/sentencepiece-js"
export TOKENIZERS_CPP_HOME="3rdparty/tokenizers-cpp/web"

mkdir -p dist
cd ${TVM_HOME}/web && make && npm install && npm run bundle && cd -
git submodule update --init --recursive
cd ${SENTENCEPIECE_JS_HOME} && npm install && npm run build && cd -
cd ${TOKENIZERS_CPP_HOME} && npm install && npm run build && cd -
git submodule update --init --recursive
rm -rf dist/sentencepiece
cp -r ${SENTENCEPIECE_JS_HOME}/dist dist/sentencepiece
rm -rf dist/tokenizers-cpp
cp -r ${TOKENIZERS_CPP_HOME}/dist dist/tokenizers-cpp

echo "Exporting tvmjs runtime dist files"
python -c "from tvm.contrib import tvmjs; tvmjs.export_runtime(\"dist\")"
32 changes: 20 additions & 12 deletions site/_includes/llm_chat.html
Original file line number Diff line number Diff line change
Expand Up @@ -3,31 +3,39 @@
</script>

<script type="module">
async function getTokenizer(url) {
const mod = await import("./dist/sentencepiece/index.js");
return await mod.sentencePieceProcessor(url);
import { Tokenizer } from './dist/tokenizers-cpp/index.js';

async function tokenizerFromJSON(jsonArrayBuffer) {
return await Tokenizer.fromJSON(jsonArrayBuffer);
}
async function tokenizerFromSentencePiece(modelBuffer) {
return await Tokenizer.fromSentencePiece(modelBuffer);
}
tvmjsGlobalEnv.sentencePieceProcessor = getTokenizer;
tvmjsGlobalEnv.tokenizerFromJSON = tokenizerFromJSON;
tvmjsGlobalEnv.tokenizerFromSentencePiece = tokenizerFromSentencePiece;

</script>
<script>
function handleChatUIInputEnter(event) {
if (event.keyCode === 13) {
tvmjsGlobalEnv.asyncOnGenerate();
}
}
async function getTokenizer(url) {
const mod = await import("./dist/sentencepiece/index.js");
return await mod.sentencePieceProcessor(url);
}
tvmjsGlobalEnv.sentencePieceProcessor = getTokenizer;
</script>

<!--
<form>
<select id="model-name">
<option selected="selected">vicuna-v1-7b-q4f32_0</option>
<!-- <option >dolly-v1-3b-q4f32_0</option> -->
<option >RedPajama-INCITE-Chat-3B-v1-q4f32_0</option>
</select>
</form>
</form> -->

<label>Pick a pre-compiled model or load your own model's mlc-chat-config.json:
<input list="model-names" name="model" id="model"/></label>
<datalist id="model-names">
<option value="vicuna-v1-7b-q4f32_0" ></option>
<option value="RedPajama-INCITE-Chat-3B-v1-q4f32_0"></option>
</datalist>

<script src="dist/llm_chat.js"></script>
<link href="dist/llm_chat.css" rel="stylesheet" type="text/css"/>
Expand Down
6 changes: 4 additions & 2 deletions web/global_config.json
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
{
"url_dict":{
"vicuna-v1-7b-q4f32_0": "https://huggingface.co/hongyij/web-llm-test-model/resolve/main/model_config.json"
"vicuna-v1-7b-q4f32_0": "https://huggingface.co/hongyij/mlc-llm-vicuna-7b-v1/resolve/new_config/model_config.json",
"RedPajama-INCITE-Chat-3B-v1-q4f32_0": "https://huggingface.co/hongyij/mlc-llm-redpajama/resolve/main/mlc-llm-config.json"
},
"model_lib_map":{
"vicuna-v1-7b-q4f32_0": "https://raw.githubusercontent.com/jinhongyii/llm-lib-debug/main/vicuna-v1-7b-q4f32_0-webgpu.wasm"
"vicuna-v1-7b-q4f32_0": "https://raw.githubusercontent.com/jinhongyii/llm-lib-debug/new_config/vicuna-v1-7b-q4f32_0-webgpu.wasm",
"RedPajama-INCITE-Chat-3B-v1-q4f32_0": "https://raw.githubusercontent.com/jinhongyii/llm-lib-debug/new_config/RedPajama-INCITE-Chat-3B-v1-q4f32_0-webgpu.wasm"
}
}
32 changes: 20 additions & 12 deletions web/llm_chat.html
Original file line number Diff line number Diff line change
Expand Up @@ -3,31 +3,39 @@
</script>

<script type="module">
async function getTokenizer(url) {
const mod = await import("./dist/sentencepiece/index.js");
return await mod.sentencePieceProcessor(url);
import { Tokenizer } from './dist/tokenizers-cpp/index.js';

async function tokenizerFromJSON(jsonArrayBuffer) {
return await Tokenizer.fromJSON(jsonArrayBuffer);
}
async function tokenizerFromSentencePiece(modelBuffer) {
return await Tokenizer.fromSentencePiece(modelBuffer);
}
tvmjsGlobalEnv.sentencePieceProcessor = getTokenizer;
tvmjsGlobalEnv.tokenizerFromJSON = tokenizerFromJSON;
tvmjsGlobalEnv.tokenizerFromSentencePiece = tokenizerFromSentencePiece;

</script>
<script>
function handleChatUIInputEnter(event) {
if (event.keyCode === 13) {
tvmjsGlobalEnv.asyncOnGenerate();
}
}
async function getTokenizer(url) {
const mod = await import("./dist/sentencepiece/index.js");
return await mod.sentencePieceProcessor(url);
}
tvmjsGlobalEnv.sentencePieceProcessor = getTokenizer;
</script>

<!--
<form>
<select id="model-name">
<option selected="selected">vicuna-v1-7b-q4f32_0</option>
<!-- <option >dolly-v1-3b-q4f32_0</option> -->
<option >RedPajama-INCITE-Chat-3B-v1-q4f32_0</option>
</select>
</form>
</form> -->

<label>Pick a pre-compiled model or load your own model's mlc-chat-config.json:
<input list="model-names" name="model" id="model"/></label>
<datalist id="model-names">
<option value="vicuna-v1-7b-q4f32_0" ></option>
<option value="RedPajama-INCITE-Chat-3B-v1-q4f32_0"></option>
</datalist>

<script src="dist/llm_chat.js"></script>
<link href="dist/llm_chat.css" rel="stylesheet" type="text/css"/>
Expand Down
Loading

0 comments on commit fb6d0e8

Please sign in to comment.