Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
shengyumao committed Mar 25, 2024
1 parent 8833b8c commit bc987f1
Showing 5 changed files with 15 additions and 59 deletions.
1 change: 1 addition & 0 deletions easyeditor/dataset/personality.py
Original file line number Diff line number Diff line change
@@ -53,6 +53,7 @@ def __init__(self, data_dir: str, size: typing.Optional[int] = None, config=None
if isinstance(tokenizer, GPT2Tokenizer) or isinstance(tokenizer, GPT2TokenizerFast):
tokenizer.pad_token_id = tokenizer.eos_token_id
tokenizer.padding_side = 'left'
tokenizer.add_special_tokens({'sep_token': '</s>'})
print('GPTTokenizer Detected, Set pad token id and left padding!!!')
elif isinstance(tokenizer, LlamaTokenizer):
tokenizer.pad_token_id = tokenizer.eos_token_id
13 changes: 6 additions & 7 deletions easyeditor/editors/per_editor.py
Original file line number Diff line number Diff line change
@@ -67,13 +67,12 @@ def __init__(self,
self.tok = LlamaTokenizer.from_pretrained(self.model_name)
self.tok.pad_token_id = 0 if self.tok.pad_token_id is None else self.tok.pad_token_id
self.tok.bos_token_id = 1
# self.tok.add_special_tokens({'sep_token': '</s>'})
# self.model.resize_token_embeddings(len(self.tok))
# self.model.lm_head.weight.data[-1, :] = self.model.lm_head.weight.data.mean(0)
# if "gpt" in self.model_name.lower():
# tokenizer.add_special_tokens({'sep_token': '</s>'})
# model.resize_token_embeddings(len(tokenizer))
# model.lm_head.weight.data[-1, :] = model.lm_head.weight.data.mean(0)
if "gpt" in self.model_name.lower():
self.model = AutoModelForCausalLM.from_pretrained(self.model_name, torch_dtype=torch_dtype, device_map=device_map)
self.tok = GPT2Tokenizer.from_pretrained(self.model_name)
self.tok.pad_token_id = self.tok.eos_token_id
self.tok.add_special_tokens({'sep_token': '</s>'})
self.model.resize_token_embeddings(len(self.tok))
else:
raise NotImplementedError

6 changes: 3 additions & 3 deletions easyeditor/evaluate/evaluate.py
Original file line number Diff line number Diff line change
@@ -27,7 +27,7 @@
kl_loc_loss,
es,
es_per_icl,
eval_TPSI,
per_generation,
F1
)

@@ -749,7 +749,7 @@ def compute_per_ike_metric(
}

if test_generation:
result.update(eval_TPSI(
result.update(per_generation(
model=model,
tok=tok,
max_out_len=60,
@@ -798,7 +798,7 @@ def compute_per_metric(
}

if test_generation:
result.update(eval_TPSI(
result.update(per_generation(
model=model,
edited_model=edited_model,
tok=tok,
51 changes: 2 additions & 49 deletions easyeditor/evaluate/evaluate_utils.py
Original file line number Diff line number Diff line change
@@ -399,16 +399,14 @@ def es_per_icl(example, pre_logits, edit_logits):
}


def eval_TPSI(
def per_generation(
model,
tok,
max_out_len: int,
target_per,
device,
edited_model=None,
retry=4,
IKE=False,
TPSI=False,
**kwargs
):

@@ -443,53 +441,8 @@ def clean_text(text):
"edit_text": edit_text,
"ngram_pre_text": n_gram_entropy([pre_text]),
"ngram_edit_text": n_gram_entropy([edit_text]),
"target_per": target_per,
}

def call_gpt4(text):
while True:
try:
response=openai.ChatCompletion.create(
model="gpt-4-0613",
messages=[
{"role": "user", "content": text }
],
temperature=0,
max_tokens=1
)
break
except Exception as e:
print(e)

score = response["choices"][0]["message"]["content"]
cost = response["usage"]["total_tokens"]
score = int(score) if score in ["1", "2", "3", "4", "5"] else -1

return score, cost

prompt_dict = {
"extraversion": "Respond to match this description.\nPersona Description: I can be described as a person with one or more adjectives in friendly, extraverted, talkative, bold, assertive, active, energetic, adventurous and daring, cheerful.\nEvaluating the opinion: \"{}\"\nhow accurately the opinion mathes the description, please rate a scale in [1,2,3,4,5] (where 1 = `very inaccurate`, 2 = `moderately inaccurate`, 3 = `neither accurate nor inaccurate`, 4 = `moderately accurate`, and 5 = `very accurate`):",
"agreeableness": "Respond to match this description.\nPersona Description: I can be described as a person with one or more adjectives in trustful, dishonest, honest, altruistic, generous, cooperative, humble, sympathetic, unselfish, agreeable.\nEvaluating the opinion: \"{}\"\nhow accurately the opinion mathes the description, please rate a scale in [1,2,3,4,5] (where 1 = `very inaccurate`, 2 = `moderately inaccurate`, 3 = `neither accurate nor inaccurate`, 4 = `moderately accurate`, and 5 = `very accurate`):",
"neuroticism": "Respond to match this description.\nPersona Description: I can be described as a person with one or more adjectives in tense, nervous, anxious, angry, irritable, depressed, self-conscious, impulsive, discontented, emotionally unstable.\nEvaluating the opinion: \"{}\"\nhow accurately the opinion mathes the description, please rate a scale in [1,2,3,4,5] (where 1 = `very inaccurate`, 2 = `moderately inaccurate`, 3 = `neither accurate nor inaccurate`, 4 = `moderately accurate`, and 5 = `very accurate`):"
}

prompt = prompt_dict[target_per]

if TPSI:
for i in range(retry):
pre_score, _ = call_gpt4(prompt.format(pre_text))
if pre_score != -1: break
pre_score = None

for i in range(retry):
edit_score, _ = call_gpt4(prompt.format(edit_text))
if edit_score != -1: break
edit_score = None

result.update({
"pre": pre_score,
"edit": edit_score,
"TPSI": edit_score-pre_score
})

return result

3 changes: 3 additions & 0 deletions easyeditor/trainer/PerTrainer.py
Original file line number Diff line number Diff line change
@@ -33,6 +33,9 @@ def __init__(self, config, train_set: Dataset, val_set: Dataset):
self.lr_opt.load_state_dict(self.archive["lr_opt"])
else:
self.lr_opt = None

if 'gpt' in self.config.model_class.lower():
self.model.model.resize_token_embeddings(len(val_set.tok))

def edit_step(self, batch, training: bool):
self.model.train(training)

0 comments on commit bc987f1

Please sign in to comment.