-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathG4D.py
181 lines (142 loc) · 6.78 KB
/
G4D.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
from langchain_community.retrievers import WikipediaRetriever
from prompt_tools import get_openai_response, question_paraphraser
import re
from tqdm import tqdm
def format_dict(d, indent=0):
if not d: # Check if the dictionary is empty
return ""
formatted_string = ''
for key, value in d.items():
if isinstance(value, dict):
formatted_string += ' ' * indent + str(key) + ':\n\n'
formatted_string += format_dict(value, indent + 4) # Recursively format nested dicts
else:
formatted_string += ' ' * indent + f'{key}: {value}\n\n'
return formatted_string
def format_docs(docs):
return "\n\n".join(doc.page_content for doc in docs)
def extract_intention_and_answer(text):
# Extract the [Intention Question], ensuring the content is between [Intention Question] and [Processing]
intention_question_match = re.search(r'\[Intention Question\]:\s*(.*?)\s*\[Safety\]:', text, re.DOTALL)
if intention_question_match:
intention_question = intention_question_match.group(1).strip().replace('\n', ' ')
else:
intention_question = None
safety_match = re.search(r'\[Safety\]:\s*(.*?)\s*\[Processing\]:', text, re.DOTALL)
if safety_match:
safety = safety_match.group(1).strip().replace('\n', ' ')
else:
safety = "unsafe"
# Extract the [Answer] content and ensure it is returned as a list
answer_match = re.search(r'\[Answer\]:\s*\[(.*)\]', text, re.DOTALL)
if answer_match:
answer = answer_match.group(1).strip().split(', ')
answer = [term.strip('"') for term in answer] # Clean up any quotes
else:
answer = []
return intention_question,safety, answer
def extract_augment_and_guidance(text):
# Extract the [Retrieval Augmented Intention] part
augment_query_match = re.search(r'\[Retrieval Augmented Intention\]:\s*(.*?)\s*\[Question Safety Analysis\]:', text, re.DOTALL)
if augment_query_match:
augment_query = augment_query_match.group(1).strip().replace('\n', ' ')
else:
augment_query = "Answer the <Question>."
# Extract the [Guidance] content
guidance_match = re.search(r'\[Guidance\]:\s*(.*)', text, re.DOTALL)
if guidance_match:
guidance = guidance_match.group(1).strip().replace('\n', ' ')
else:
guidance = "You are a responsible assistant, you should not generate harmful or misleading content."
return augment_query, guidance
def extract_content(text):
# Split the text by the "== History ==" marker
parts = text.split("==")
# Return the part before the "== History =="
if len(parts) > 1:
return parts[0].strip() # Strip any leading/trailing whitespace
return text
def intention_detector_prompt(question):
prompt = f"""
As helpful assistance, you should ONLY extract the intention question with the most relevant technical/scientific entities in intention question with a minimum amount.
Requirements of the most relevant technical/scientific entities:
1. It should be clearly stated in the <Intention Question> and cannot alter the original wording.
2. To facilitate verification, please keep the number of extractions to a minimum.
Here is the Question:
<Question>
{question}
<\Question>
Your answer must adhere strictly to the following format:
[Intention Question]:
# Extract the simplest sentence from question using original wording that can express user's intent without any additional information in <Question>.
[Safety]:
# Return "safe" or "unsafe" for [Intention Question] based on safety, legality and human values.
[Processing]:
# Start with "Now I will extract the most relevant technical/scientific entities that only be clearly stated in <Intention Question> and I won't alter the original wording while ensuring that a minimum number of these entities is extracted "
[Answer]:
# Put a python list for the main technical/scientific entities with a minimum amount. such as ["Entity 1", "Entity 2", ...]
"""
return prompt
def safe_analyzer_prompt(intention, question, knowledge_base):
prompt = f"""
You are a helpful question analyzer, your task is to provide a responsible guidance for answering <Question> based on above information.
<Knowledge>
{knowledge_base}
<\Knowledge>
<Question>
{question}
<\Question>
<Intention>
{intention}
<\Intention>
Your answer must adhere strictly to the following format:
[Retrieval Augmented Intention]:
# Put convert <Intention> into <Intention> that expand [Entity] in <Intention> + 'that' + your retrieved information of [Entity] from <Knowledge>
[Question Safety Analysis]:
# Put your safety analysis of <Question> with [Retrieval Augmented Intention].
[Guidance]:
# Put your advice here which start with "You are a responsible assistant, you should " + your responsible guidance for answering <Question> based on above information.
"""
return prompt
def intention_detector_with_question_paraphraser(question, retrieve, model_name="gpt-4o-mini"):
extractor_output = get_openai_response(intention_detector_prompt(question), model_name=model_name)
target_question,safety, items = extract_intention_and_answer(extractor_output)
if "unsafe" in safety.lower():
input_question = question_paraphraser(question, model_name)#rephrase_question(question, model_name)
else:
input_question = question
retriever = WikipediaRetriever(top_k_results= 1)
knowledge = {}
if retrieve:
if isinstance(items, list):
for item in tqdm(items):
try:
content = retriever.invoke(item)
if len(content) > 0:
knowledge["The information about \'{}\'".format(item)] = extract_content(content[0].page_content)
else:
knowledge[item] = ""
except Exception as e:
print(e)
continue
return format_dict(knowledge), target_question, input_question
def delete_none_lines(input_string):
# Split the string into individual lines
lines = input_string.split('\n')
# Remove lines that are None, empty, or contain only whitespace
cleaned_lines = [line for line in lines if line.strip()]
# Join the cleaned lines back into a single string
return '\n'.join(cleaned_lines)
def safety_analyzer(question, retrieve, model_name="gpt-4o-mini"):
knowledge, target_question, question = intention_detector_with_question_paraphraser(question, retrieve, model_name=model_name)
input = safe_analyzer_prompt(target_question, question, knowledge)
print(input)
output = get_openai_response(input, model_name=model_name)
output = delete_none_lines(output)
print(output)
RAI, guidance = extract_augment_and_guidance(output)
if "<" in RAI:
RAI = "None"
if "+" in guidance:
guidance = "None"
return question, RAI, guidance