Support RedPajama-Chat-3B-V1 (#99)

mlc-ai · May 18, 2023 · fb6d0e8 · fb6d0e8
1 parent cfd78db
commit fb6d0e8
Show file tree

Hide file tree

Showing 10 changed files with 201 additions and 120 deletions.
diff --git a/.gitmodules b/.gitmodules
@@ -1,6 +1,6 @@
-[submodule "3rdparty/sentencepiece-js"]
-	path = 3rdparty/sentencepiece-js
-	url = https://github.com/tqchen/sentencepiece-js
 [submodule "mlc-llm"]
 	path = mlc-llm
 	url = https://github.com/mlc-ai/mlc-llm
+[submodule "3rdparty/tokenizers-cpp"]
+	path = 3rdparty/tokenizers-cpp
+	url = https://github.com/mlc-ai/tokenizers-cpp
diff --git a/3rdparty/sentencepiece-js b/3rdparty/sentencepiece-js
diff --git a/3rdparty/tokenizers-cpp b/3rdparty/tokenizers-cpp
diff --git a/mlc-llm b/mlc-llm
diff --git a/scripts/build_site.sh b/scripts/build_site.sh
@@ -23,7 +23,7 @@ cp web/llm_chat.css site/dist/
 
 cp dist/tvmjs_runtime.wasi.js site/dist
 cp dist/tvmjs.bundle.js site/dist
-cp -r dist/sentencepiece site/dist
+cp -r dist/tokenizers-cpp site/dist
 
 if [ -d "$MLC_LLM_HOME/dist/vicuna-v1-7b-q4f32_0/params" ]; then
     mkdir -p site/dist/vicuna-v1-7b-q4f32_0

diff --git a/scripts/prep_deps.sh b/scripts/prep_deps.sh
@@ -15,15 +15,15 @@ if [[ -z ${TVM_HOME_SET} ]]; then
     export TVM_HOME="${TVM_HOME:-3rdparty/tvm}"
 fi
 
-export SENTENCEPIECE_JS_HOME="3rdparty/sentencepiece-js"
+export TOKENIZERS_CPP_HOME="3rdparty/tokenizers-cpp/web"
 
 mkdir -p dist
 cd ${TVM_HOME}/web && make && npm install && npm run bundle && cd -
 git submodule update --init --recursive
-cd ${SENTENCEPIECE_JS_HOME} && npm install && npm run build && cd -
+cd ${TOKENIZERS_CPP_HOME} && npm install && npm run build && cd -
 git submodule update --init --recursive
-rm -rf dist/sentencepiece
-cp -r ${SENTENCEPIECE_JS_HOME}/dist dist/sentencepiece
+rm -rf dist/tokenizers-cpp
+cp -r ${TOKENIZERS_CPP_HOME}/dist dist/tokenizers-cpp
 
 echo "Exporting tvmjs runtime dist files"
 python -c "from tvm.contrib import tvmjs; tvmjs.export_runtime(\"dist\")"
diff --git a/site/_includes/llm_chat.html b/site/_includes/llm_chat.html
@@ -3,31 +3,39 @@
 </script>
 
 <script type="module">
-  async function getTokenizer(url) {
-    const mod = await import("./dist/sentencepiece/index.js");
-    return await mod.sentencePieceProcessor(url);
+  import { Tokenizer } from './dist/tokenizers-cpp/index.js';
+
+  async function tokenizerFromJSON(jsonArrayBuffer) {
+    return await Tokenizer.fromJSON(jsonArrayBuffer);
+  }
+  async function tokenizerFromSentencePiece(modelBuffer) {
+    return await Tokenizer.fromSentencePiece(modelBuffer);
   }
-  tvmjsGlobalEnv.sentencePieceProcessor = getTokenizer;
+  tvmjsGlobalEnv.tokenizerFromJSON = tokenizerFromJSON;
+  tvmjsGlobalEnv.tokenizerFromSentencePiece = tokenizerFromSentencePiece;
+
 </script>
 <script>
   function handleChatUIInputEnter(event) {
     if (event.keyCode === 13) {
       tvmjsGlobalEnv.asyncOnGenerate();
     }
   }
-  async function getTokenizer(url) {
-    const mod = await import("./dist/sentencepiece/index.js");
-    return await mod.sentencePieceProcessor(url);
-  }
-  tvmjsGlobalEnv.sentencePieceProcessor = getTokenizer;
 </script>
-
+<!-- 
 <form>
   <select id="model-name">
     <option selected="selected">vicuna-v1-7b-q4f32_0</option>
-    <!-- <option >dolly-v1-3b-q4f32_0</option> -->
+    <option >RedPajama-INCITE-Chat-3B-v1-q4f32_0</option>
   </select>
-</form>
+</form> -->
+
+<label>Pick a pre-compiled model or load your own model's mlc-chat-config.json:
+  <input list="model-names" name="model" id="model"/></label>
+<datalist id="model-names">
+  <option value="vicuna-v1-7b-q4f32_0" ></option>
+  <option value="RedPajama-INCITE-Chat-3B-v1-q4f32_0"></option>
+</datalist>
 
 <script src="dist/llm_chat.js"></script>
 <link href="dist/llm_chat.css" rel="stylesheet" type="text/css"/>

diff --git a/web/global_config.json b/web/global_config.json
@@ -1,8 +1,10 @@
 {
     "url_dict":{
-        "vicuna-v1-7b-q4f32_0": "https://huggingface.co/hongyij/web-llm-test-model/resolve/main/model_config.json"
+        "vicuna-v1-7b-q4f32_0": "https://huggingface.co/hongyij/mlc-llm-vicuna-7b-v1/resolve/new_config/model_config.json",
+        "RedPajama-INCITE-Chat-3B-v1-q4f32_0": "https://huggingface.co/hongyij/mlc-llm-redpajama/resolve/main/mlc-llm-config.json"
     },
     "model_lib_map":{
-        "vicuna-v1-7b-q4f32_0": "https://raw.githubusercontent.com/jinhongyii/llm-lib-debug/main/vicuna-v1-7b-q4f32_0-webgpu.wasm"
+        "vicuna-v1-7b-q4f32_0": "https://raw.githubusercontent.com/jinhongyii/llm-lib-debug/new_config/vicuna-v1-7b-q4f32_0-webgpu.wasm",
+        "RedPajama-INCITE-Chat-3B-v1-q4f32_0": "https://raw.githubusercontent.com/jinhongyii/llm-lib-debug/new_config/RedPajama-INCITE-Chat-3B-v1-q4f32_0-webgpu.wasm"
     }
 }
diff --git a/web/llm_chat.html b/web/llm_chat.html
@@ -3,31 +3,39 @@
 </script>
 
 <script type="module">
-  async function getTokenizer(url) {
-    const mod = await import("./dist/sentencepiece/index.js");
-    return await mod.sentencePieceProcessor(url);
+  import { Tokenizer } from './dist/tokenizers-cpp/index.js';
+
+  async function tokenizerFromJSON(jsonArrayBuffer) {
+    return await Tokenizer.fromJSON(jsonArrayBuffer);
+  }
+  async function tokenizerFromSentencePiece(modelBuffer) {
+    return await Tokenizer.fromSentencePiece(modelBuffer);
   }
-  tvmjsGlobalEnv.sentencePieceProcessor = getTokenizer;
+  tvmjsGlobalEnv.tokenizerFromJSON = tokenizerFromJSON;
+  tvmjsGlobalEnv.tokenizerFromSentencePiece = tokenizerFromSentencePiece;
+
 </script>
 <script>
   function handleChatUIInputEnter(event) {
     if (event.keyCode === 13) {
       tvmjsGlobalEnv.asyncOnGenerate();
     }
   }
-  async function getTokenizer(url) {
-    const mod = await import("./dist/sentencepiece/index.js");
-    return await mod.sentencePieceProcessor(url);
-  }
-  tvmjsGlobalEnv.sentencePieceProcessor = getTokenizer;
 </script>
-
+<!-- 
 <form>
   <select id="model-name">
     <option selected="selected">vicuna-v1-7b-q4f32_0</option>
-    <!-- <option >dolly-v1-3b-q4f32_0</option> -->
+    <option >RedPajama-INCITE-Chat-3B-v1-q4f32_0</option>
   </select>
-</form>
+</form> -->
+
+<label>Pick a pre-compiled model or load your own model's mlc-chat-config.json:
+  <input list="model-names" name="model" id="model"/></label>
+<datalist id="model-names">
+  <option value="vicuna-v1-7b-q4f32_0" ></option>
+  <option value="RedPajama-INCITE-Chat-3B-v1-q4f32_0"></option>
+</datalist>
 
 <script src="dist/llm_chat.js"></script>
 <link href="dist/llm_chat.css" rel="stylesheet" type="text/css"/>
+0 −1		.gitignore
+3 −3		.gitmodules
+0 −1		3rdparty/sentencepiece-js
+1 −0		3rdparty/tokenizers-cpp
+0 −81		3rdparty/tokenizers-cpp/CMakeLists.txt
+0 −13		3rdparty/tokenizers-cpp/Cargo.toml
+0 −187		3rdparty/tokenizers-cpp/src/lib.rs
+0 −101		3rdparty/tokenizers-cpp/tokenizers.h
+13 −27		CMakeLists.txt
+47 −1		README.md
+2 −6		android/MLCChat/app/src/main/java/ai/mlc/mlcchat/LLMChat.java
+8 −1		android/MLCChat/app/src/main/jni/Android.mk
+17 −22		android/README.md
+1 −1		android/prepare_libs.sh
+161 −54		build.py
+30 −0		cpp/README.md
+113 −97		cpp/cli_main.cc
+442 −335		cpp/llm_chat.cc
+6 −4		cpp/llm_chat.h
+5 −3		ios/MLCChat.xcodeproj/project.pbxproj
+7 −14		ios/MLCChat/LLMChat.mm
+143 −48		ios/README.md
+8 −7		ios/prepare_libs.sh
+9 −5		ios/prepare_params.sh
+0 −36		log_db/RedPajama-INCITE-Chat-3B-v1/database_tuning_record.json
+35 −0		log_db/redpajama-3b-q4f16/database_tuning_record.json
+30 −31		log_db/redpajama-3b-q4f16/database_workload.json
+30 −0		log_db/redpajama-3b-q4f32/database_tuning_record.json
+30 −0		log_db/redpajama-3b-q4f32/database_workload.json
+53 −16		mlc_llm/conversation.py
+7 −1		mlc_llm/dispatch/gpt_neox/__init__.py
+2 −2		mlc_llm/dispatch/gpt_neox/dolly_v2_3b.py
+972 −0		mlc_llm/dispatch/gpt_neox/redpajama_incite_chat_3b_v1.py
+722 −0		mlc_llm/dispatch/gpt_neox/redpajama_incite_chat_3b_v1_mod.py
+1,010 −0		mlc_llm/dispatch/gpt_neox/redpajama_incite_chat_3b_v1_tune.py
+840 −0		mlc_llm/dispatch/gpt_neox/redpajama_q4f32.py
+577 −0		mlc_llm/dispatch/gpt_neox/redpajama_q4f32_mod.py
+743 −0		mlc_llm/dispatch/gpt_neox/redpajama_q4f32_tune.py
+23 −0		mlc_llm/relax_model/commons.py
+60 −44		mlc_llm/relax_model/gpt_neox.py
+46 −22		mlc_llm/relax_model/llama.py
+25 −5		mlc_llm/relax_model/modules.py
+103 −84		mlc_llm/relax_model/moss.py
+18 −8		mlc_llm/transform/decode_matmul_ewise.py
+5 −1		mlc_llm/transform/quantization.py
+18 −8		mlc_llm/transform/transpose_matmul.py
+229 −36		mlc_llm/tuning.py
+24 −30		mlc_llm/utils.py
+7 −3		tests/chat.py
+5 −3		tests/debug/compare_lib.py
+3 −2		tests/evaluate.py