forked from SeungyounShin/Llama2-Code-Interpreter
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathllama_hf.py
101 lines (83 loc) · 2.57 KB
/
llama_hf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
from typing import Optional
import os, sys
from transformers import LlamaForCausalLM, LlamaTokenizer
import torch
from datetime import datetime
sys.path.append(os.path.dirname(__file__))
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
from utils.special_tok_llama2 import (
B_CODE,
E_CODE,
B_RESULT,
E_RESULT,
B_INST,
E_INST,
B_SYS,
E_SYS,
DEFAULT_PAD_TOKEN,
DEFAULT_BOS_TOKEN,
DEFAULT_EOS_TOKEN,
DEFAULT_UNK_TOKEN,
IGNORE_INDEX,
)
def create_peft_config(model):
from peft import (
get_peft_model,
LoraConfig,
TaskType,
prepare_model_for_int8_training,
)
peft_config = LoraConfig(
task_type=TaskType.CAUSAL_LM,
inference_mode=False,
r=8,
lora_alpha=32,
lora_dropout=0.05,
target_modules=["q_proj", "v_proj"],
)
# prepare int-8 model for training
model = prepare_model_for_int8_training(model)
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()
return model, peft_config
def build_model_from_hf_path(
hf_base_model_path: str = "./ckpt/llama-2-13b-chat",
load_peft: Optional[bool] = False,
peft_model_path: Optional[str] = None,
load_in_4bit: bool = True,
):
start_time = datetime.now()
# build tokenizer
tokenizer = LlamaTokenizer.from_pretrained(
hf_base_model_path,
padding_side="right",
use_fast=False,
)
# Handle special tokens
special_tokens_dict = dict()
if tokenizer.pad_token is None:
special_tokens_dict["pad_token"] = DEFAULT_PAD_TOKEN # 32000
if tokenizer.eos_token is None:
special_tokens_dict["eos_token"] = DEFAULT_EOS_TOKEN # 2
if tokenizer.bos_token is None:
special_tokens_dict["bos_token"] = DEFAULT_BOS_TOKEN # 1
if tokenizer.unk_token is None:
special_tokens_dict["unk_token"] = DEFAULT_UNK_TOKEN
tokenizer.add_special_tokens(special_tokens_dict)
tokenizer.add_tokens(
[B_CODE, E_CODE, B_RESULT, E_RESULT, B_INST, E_INST, B_SYS, E_SYS],
special_tokens=True,
)
# build model
model = LlamaForCausalLM.from_pretrained(
hf_base_model_path,
load_in_4bit=load_in_4bit,
device_map="auto",
)
model.resize_token_embeddings(len(tokenizer))
if load_peft and (peft_model_path is not None):
from peft import PeftModel
model = PeftModel.from_pretrained(model, peft_model_path)
end_time = datetime.now()
elapsed_time = end_time - start_time
return {"tokenizer": tokenizer, "model": model}