-
Notifications
You must be signed in to change notification settings - Fork 18
/
Copy pathpipeline.py
77 lines (65 loc) · 2.32 KB
/
pipeline.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# -*- coding: UTF-8 -*-
import numpy as np
from dnn import DNN
from config import TrainMode
from re_cnn import RECNN
from evaluate import estimate_ner
def get_cws(content, model_name):
dnn = DNN('mlp', mode=TrainMode.Sentence, task='ner')
ner = dnn.seg(content, model_path=model_name, ner=True, trans=True)[1]
return ner
def get_ner(content, model_name):
if model_name.startswith('tmp/mlp'):
dnn = DNN('mlp', mode=TrainMode.Sentence, task='ner', is_seg=True)
else:
dnn = DNN('lstm', task='ner', is_seg=True)
ner = dnn.seg(content, model_path=model_name, ner=True, trans=True)
return ner[1]
def get_relation():
re = RECNN(2)
re.evaluate('cnn_emr_model3.ckpt')
re.evaluate('cnn_emr_model3.ckpt')
def evaluate_ner(model_name):
base_folder = 'corpus/emr_ner_test_'
labels = np.load(base_folder + 'labels.npy')
characters = np.load(base_folder + 'characters.npy')
corr_count = 0
prec_count = 0
recall_count = 0
for ch, l in zip(characters, labels):
c_count, p_count, r_count = estimate_ner(get_ner(ch, model_name), l)
corr_count += c_count
prec_count += p_count
recall_count += r_count
print(corr_count, prec_count, recall_count)
prec = corr_count / prec_count
recall = corr_count / recall_count
f1 = 2 * prec * recall / (prec + recall)
with open('corpus/ner.txt', 'a', encoding='utf8') as f:
f.write(model_name + '\t{:.2f}\t{:.2f}\t{:.2f}\n'.format(prec * 100, recall * 100, f1 * 100))
print('precision:', prec)
print('recall:', recall)
print('F1 score:', f1)
def evaluate_re():
window_size = [(2,), (3,), (4,), (2, 3), (3, 4), (2, 3, 4)]
for w in window_size:
print('window size:', w)
re_two = RECNN(2, window_size=w, train=False)
# re_multi = RECNN(29, window_size=w, train=False)
name = 'cnn_emr_model100_{0}.ckpt'.format('_'.join(map(str, w)))
re_two.evaluate(name)
# re_multi.evaluate(name)
if __name__ == '__main__':
# 实体识别
# print('mlp')
# evaluate_ner('tmp/mlp/mlp-ner-model20.ckpt')
# print('mlp+embed')
# evaluate_ner('tmp/mlp/mlp-ner-embed-model50.ckpt')
# print('lstm')
# evaluate_ner('tmp/lstm/lstm-ner-model50.ckpt')
# print('lstm+embed')
# evaluate_ner('tmp/lstm/lstm-ner-embed-model50.ckpt')
# 关系抽取
evaluate_re()
# re_two = RECNN(2, window_size=(4,), train=False)
# re_two.evaluate('cnn_emr_model60_4.ckpt')