add genai example

baihuajun24 · Sep 20, 2024 · 647bc67 · 647bc67
1 parent 321907d
commit 647bc67
Show file tree

Hide file tree

Showing 5 changed files with 61 additions and 5 deletions.
diff --git a/README.md b/README.md
@@ -62,6 +62,12 @@ python3 convert.py --model_id qwen/Qwen2-7B-Instruct --precision int4 --output {
 python3 chat.py --model_path {your_path}/Qwen2-7B-Instruct-ov --max_sequence_length 4096 --device CPU
 ```
 
+or
+
+```
+python3 chat_genai.py --model_path {your_path}/Qwen2-7B-Instruct-ov --max_sequence_length 4096 --device CPU
+```
+
 ### Parameters that can be selected
 
 * `--model_path` - The path to the directory where the OpenVINO IR model is located.

diff --git a/README_zh.md b/README_zh.md
@@ -61,6 +61,12 @@ python3 convert.py --model_id qwen/Qwen2-7B-Instruct --precision int4 --output {
 python3 chat.py --model_path {your_path}/Qwen2-7B-Instruct-ov --max_sequence_length 4096 --device CPU
 ```
 
+或者
+
+```
+python3 chat_genai.py --model_path {your_path}/Qwen2-7B-Instruct-ov --max_sequence_length 4096 --device CPU
+```
+
 ### 可以选择的参数
 
 * `--model_path` - OpenVINO IR 模型所在目录的路径。

diff --git a/chat.py b/chat.py
@@ -57,7 +57,6 @@ def __call__(
         device=args.device,
         ov_config=ov_config,
         config=AutoConfig.from_pretrained(model_dir),
-        trust_remote_code=True,
     )
 
     streamer = TextIteratorStreamer(

diff --git a/chat_genai.py b/chat_genai.py
@@ -0,0 +1,46 @@
+import argparse
+import openvino_genai
+
+
+def streamer(subword):
+    print(subword, end='', flush=True)
+    return False
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(add_help=False)
+    parser.add_argument('-h',
+                        '--help',
+                        action='help',
+                        help='Show this help message and exit.')
+    parser.add_argument('-m',
+                        '--model_path',
+                        required=True,
+                        type=str,
+                        help='Required. model path')
+    parser.add_argument('-l',
+                        '--max_sequence_length',
+                        default=256,
+                        required=False,
+                        type=int,
+                        help='Required. maximun length of output')
+    parser.add_argument('-d',
+                        '--device',
+                        default='CPU',
+                        required=False,
+                        type=str,
+                        help='Required. device for inference')
+    args = parser.parse_args()
+    pipe = openvino_genai.LLMPipeline(args.model_path, args.device)
+
+    config = openvino_genai.GenerationConfig()
+    config.max_new_tokens = args.max_sequence_length
+
+    pipe.start_chat()
+    while True:
+        try:
+            prompt = input('question:\n')
+        except EOFError:
+            break
+        pipe.generate(prompt, config, streamer)
+        print('\n----------')
+    pipe.finish_chat()
diff --git a/requirements.txt b/requirements.txt
@@ -1,12 +1,11 @@
 --extra-index-url https://download.pytorch.org/whl/cpu
 numpy
---extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
-openvino
-openvino-tokenizers
+openvino==2024.4.0
+openvino-genai==2024.4.0.0
 nncf>=2.11.0
 optimum-intel>=1.17.0
 transformers>=4.40.0,<4.42.0
 onnx>=1.15.0
 huggingface-hub>=0.21.3
 torch>=2.1
-modelscope
+modelscope