-
Notifications
You must be signed in to change notification settings - Fork 84
/
chat_templates.py
189 lines (180 loc) · 6.95 KB
/
chat_templates.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
from typing import Any, Dict, List, Optional, Tuple, Union
__all__ = ["DEFAULT_CHAT_TEMPLATE", "DEFAULT_CHAT_CONFIGS", "add_space", "smart_space"]
def add_space(
msg: str,
context: str,
auto_leading_space: bool = True,
no_space_between: bool = True,
starts: Optional[List[str]] = None,
ends: Optional[List[str]] = None
) -> str:
if starts is None or ends is None or no_space_between is False:
context_ends_special = False
msg_starts_special = False
else:
context_ends_special = any(context.endswith(e) for e in ends if len(e) > 0)
msg_starts_special = any(msg.startswith(s) for s in starts if len(s) > 0)
if (auto_leading_space and msg and context)\
and not (context[-1].isspace() or msg[0].isspace())\
and not (context_ends_special and msg_starts_special):
return ' ' + msg
return msg
def smart_space(parts: List[Tuple[str, bool]], auto_leading_space: bool, no_space_between: bool, seq: List[str]) -> str:
starts = [seq[role + "_start"] for role in ["system", "user", "assistant"] if (role + "_start") in seq]
ends = [seq[role + "_end"] for role in ["system", "user", "assistant"] if (role + "_end") in seq]
if "bos_token" in seq:
ends.append(seq["bos_token"])
rendered = ""
for part in parts:
if part[0]:
rendered += add_space(
part[0],
rendered,
auto_leading_space=auto_leading_space and part[1],
no_space_between=no_space_between,
starts=starts,
ends=ends
)
return rendered
# sources: https://github.com/huggingface/chat-ui/blob/main/PROMPTS.md
DEFAULT_CHAT_TEMPLATE = (
"{%- set data = namespace(parts=[]) -%}"
""
"{%- if 'all_start' in seq -%}"
"{%- set data.parts = data.parts + [(seq['all_start'], False)] -%}"
"{%- endif -%}"
""
"{%- for message in messages -%}"
"{%- set data.parts = data.parts + [(seq[message['role'] + '_start'], False)] -%}"
"{%- set data.parts = data.parts + [(message['content'], True)] -%}"
"{%- set data.parts = data.parts + [(seq[message['role'] + '_end'], False)] -%}"
"{%- endfor -%}"
""
"{%- if add_gen_prompt -%}"
"{%- set data.parts = data.parts + [(seq['assistant_start'], False)] -%}"
"{%- set data.parts = data.parts + [(seq['generation_prompt'], True)] -%}"
"{%- endif -%}"
""
"{{ data.parts | smart_space(auto_leading_space, no_space_between, seq) }}"
)
# Chat configs format:
#
# A jinja2 chat-template accessing to variables:
# - messages: A list of dictionaries with the following keys:
# - seq: A dictionary with the `starts` and `ends` sequences of each role.
# - smart_space: A filter that controls the leading space of a string.
# - add_gen_prompt: Whether to prepend assistant_start after entire chat message.
#
# or a dictionary with the following keys:
# - all_start: The string to prepend to the entire chat message.
# - system_start: The string to prepend to the system message.
# - system_end: The string to append to the system message.
# - user_start: The string to prepend to the user message.
# - user_end: The string to append to the user message.
# - assistant_start: The string to prepend to the assistant message.
# - assistant_end: The string to append to the assistant message.
# - auto_leading_space: Whether to add a leading space when concatenating two
# strings if the first string does not end with a whitespace.
# - no_space_between: Whether to not add the leading space between special tokens.
# - default_stop: A list of strings that indicate the end of a message.
# - merge_system_to_user: Whether to convert system message to part of next user message.
#
DEFAULT_CHAT_CONFIGS: Dict[str, Union[Dict[str, Any], str]] = {
"base": {
"system_start": "",
"system_end": "\n\n",
"user_start": "",
"user_end": "",
"assistant_start": "",
"assistant_end": "\n\n",
"auto_leading_space": True,
"final_rstrip": True,
"no_space_between": False,
"default_stop": [],
},
"llama2": {
"system_start": "<<SYS>>\n",
"system_end": "\n<</SYS>>\n\n",
"user_start": "<s>[INST] ",
"user_end": " [/INST] ",
"assistant_start": "",
"assistant_end": " </s>",
"auto_leading_space": True,
"final_rstrip": False,
"no_space_between": True,
"default_stop": [],
},
"chatml": {
"system_start": "<|im_start|>system\n",
"system_end": "<|im_end|>\n",
"user_start": "<|im_start|>user\n",
"user_end": "<|im_end|>\n",
"assistant_start": "<|im_start|>assistant\n",
"assistant_end": "<|im_end|>\n",
"auto_leading_space": True,
"final_rstrip": False,
"no_space_between": True,
"default_stop": ["<|im_end|>"],
},
"zephyr": {
"system_start": "<|system|>\n",
"system_end": "</s>\n",
"user_start": "<|user|>\n",
"user_end": "</s>\n",
"assistant_start": "<|assistant|>\n",
"assistant_end": "</s>\n",
"auto_leading_space": True,
"final_rstrip": False,
"no_space_between": True,
"default_stop": ["</s>"],
},
"phi3": {
"system_start": "<|system|>\n",
"system_end": "<|end|>\n",
"user_start": "<|user|>\n",
"user_end": "<|end|>\n",
"assistant_start": "<|assistant|>\n",
"assistant_end": "<|end|>\n",
"auto_leading_space": True,
"final_rstrip": False,
"no_space_between": True,
"default_stop": ["<|end|>", "<|endoftext|>"],
},
"llama3": {
"system_start": "<|start_header_id|>system<|end_header_id|>\n\n",
"system_end": "<|eot_id|>",
"user_start": "<|start_header_id|>user<|end_header_id|>\n\n",
"user_end": "<|eot_id|>",
"assistant_start": "<|start_header_id|>assistant<|end_header_id|>\n\n",
"assistant_end": "<|eot_id|>",
"auto_leading_space": True,
"final_rstrip": False,
"no_space_between": True,
"default_stop": ["<|eot_id|>"],
},
"alpaca": {
"system_start": "### Input:\n",
"system_end": "\n\n",
"user_start": "### Instruction:\n",
"user_end": "\n\n",
"assistant_start": "### Response:\n",
"assistant_end": "\n\n",
"auto_leading_space": True,
"final_rstrip": False,
"no_space_between": False,
"default_stop": ["###"],
},
"gemma": {
"all_start": "<bos>",
"merge_system_to_user": True,
"system_user_sep": "\n",
"user_start": "<start_of_turn>user\n",
"user_end": "<end_of_turn>\n",
"assistant_start": "<start_of_turn>model\n",
"assistant_end": "<end_of_turn>\n",
"auto_leading_space": True,
"final_rstrip": False,
"no_space_between": True,
"default_stop": ["<end_of_turn>", "<start_of_turn>"],
}
}