forked from nlpxucan/WizardLM
-
Notifications
You must be signed in to change notification settings - Fork 0
/
wizardcoder_demo.py
62 lines (58 loc) · 2.21 KB
/
wizardcoder_demo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import os
import sys
import fire
import torch
import transformers
import gradio as gr
from vllm import LLM, SamplingParams
def main(
base_model="WizardLM/WizardCoder-Python-34B-V1.0",
n_gpus=4,
port=8080,
):
llm = LLM(model=base_model, tensor_parallel_size=n_gpus)
def evaluate_vllm(
instruction,
temperature=1,
max_new_tokens=2048,
):
problem_prompt = (
"Below is an instruction that describes a task. "
"Write a response that appropriately completes the request.\n\n"
"### Instruction:\n{instruction}\n\n### Response:"
)
prompt = problem_prompt.format(instruction=instruction)
problem_instruction = [prompt]
stop_tokens = ['</s>']
sampling_params = SamplingParams(temperature=temperature, top_p=1, max_tokens=max_new_tokens, stop=stop_tokens)
completions = llm.generate(problem_instruction, sampling_params)
for output in completions:
prompt = output.prompt
print('==========================question=============================')
print(prompt)
generated_text = output.outputs[0].text
print('===========================answer=============================')
print(generated_text)
return generated_text
gr.Interface(
fn=evaluate_vllm,
inputs=[
gr.components.Textbox(
lines=3, label="Instruction", placeholder="Anything you want to ask WizardCoder ?"
),
gr.components.Slider(minimum=0, maximum=1, value=0, label="Temperature"),
gr.components.Slider(
minimum=1, maximum=2048, step=1, value=1024, label="Max tokens"
),
],
outputs=[
gr.inputs.Textbox(
lines=30,
label="Output",
)
],
title="WizardCoder",
description="Empowering Code Large Language Models with Evol-Instruct, github: https://github.com/nlpxucan/WizardLM, huggingface: https://huggingface.co/WizardLM"
).queue().launch(share=True, server_port=port)
if __name__ == "__main__":
fire.Fire(main)