update_readme

LeonVanHout · Aug 28, 2023 · 1c2d76e · 1c2d76e
1 parent 526a2d5
commit 1c2d76e
Show file tree

Hide file tree

Showing 2 changed files with 87 additions and 2 deletions.
diff --git a/demo/README.md b/demo/README.md
@@ -1,4 +1,4 @@
-## WizardCoder Inference Demo
+## 一. WizardCoder Inference Demo
 
 We provide the inference demo script for **WizardCoder-Family**.
 
@@ -15,4 +15,22 @@ CUDA_VISIBLE_DEVICES=1,2,3,4 python wizardcoder_demo.py \
    --n_gpus 4
 ```
 
-Note: This script supports `WizardLM/WizardCoder-Python-34B/13B/7B-V1.0`. If you want to inference with `WizardLM/WizardCoder-15B/3B/1B-V1.0`, please change the `stop_tokens = ['</s>']` to `stop_tokens = ['<|endoftext|>']` in the script.
+Note: This script supports `WizardLM/WizardCoder-Python-34B/13B/7B-V1.0`. If you want to inference with `WizardLM/WizardCoder-15B/3B/1B-V1.0`, please change the `stop_tokens = ['</s>']` to `stop_tokens = ['<|endoftext|>']` in the script.
+
+
+## 二. WizardMath Inference Demo
+
+We provide the inference demo script for **WizardMath-Family**.
+
+1. According to the instructions of [Llama-X](https://github.com/AetherCortex/Llama-X), install the environment.
+2. Install these packages:
+```bash
+pip install transformers==4.31.0
+pip install vllm==0.1.4
+```
+3. Enjoy your demo:
+```bash
+CUDA_VISIBLE_DEVICES=0 python wizardmath_demo.py \
+   --base_model "xxx/path/to/wizardmath_7b_model" \
+   --n_gpus 1
+```
diff --git a/demo/wizardmath_demo.py b/demo/wizardmath_demo.py
@@ -0,0 +1,67 @@
+import json
+import fire
+import gradio as gr
+from vllm import LLM, SamplingParams
+import os
+
+def main(
+        base_model,
+        n_gpus=1,
+        title="WizardMath-7B-V1.0",
+        port=8080,
+        load_8bit: bool = False):
+    llm = LLM(model=base_model, tensor_parallel_size=n_gpus)
+    def evaluate_vllm(
+            instruction,
+            use_cot=True,
+            temperature=1,
+            max_new_tokens=2048,):
+
+        cot_problem_prompt = (
+            "Below is an instruction that describes a task. "
+            "Write a response that appropriately completes the request.\n\n"
+            "### Instruction:\n{instruction}\n\n### Response: Let's think step by step."
+        )
+        problem_prompt = (
+            "Below is an instruction that describes a task. "
+            "Write a response that appropriately completes the request.\n\n"
+            "### Instruction:\n{instruction}\n\n### Response:"
+        )
+        if use_cot == True:
+            prompt = cot_problem_prompt.format(instruction=instruction)
+        else:
+            prompt = problem_prompt.format(instruction=instruction)
+
+        problem_instruction = [prompt]
+        stop_tokens = ['</s>']
+        sampling_params = SamplingParams(temperature=temperature, top_p=1, max_tokens=max_new_tokens, stop=stop_tokens)
+        completions = llm.generate(problem_instruction, sampling_params)
+        for output in completions:
+            prompt = output.prompt
+            generated_text = output.outputs[0].text
+            return generated_text
+
+    gr.Interface(
+        fn=evaluate_vllm,
+        inputs=[
+            gr.components.Textbox(
+                lines=3, label="Instruction", placeholder="Anything you want to ask WizardMath ?"
+            ),
+            gr.inputs.Checkbox(default=True, label='Use CoT', optional=False),
+            gr.components.Slider(minimum=0, maximum=1, value=0, label="Temperature"),
+            gr.components.Slider(
+                minimum=1, maximum=2048, step=1, value=1024, label="Max tokens"
+            ),
+        ],
+        outputs=[
+            gr.inputs.Textbox(
+                lines=30,
+                label="Output",
+            )
+        ],
+        title=title,
+        description="Empowering Mathematical Reasoning for Large Language Models via Reinforced Evol-Instruct"
+    ).queue().launch(share=False, server_port=port)
+
+if __name__ == "__main__":
+    fire.Fire(main)