-
Notifications
You must be signed in to change notification settings - Fork 42
/
Copy pathtest_vocab_csv.py
103 lines (80 loc) · 3.38 KB
/
test_vocab_csv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
"""Unit tests for CSV vocabulary functionality in Annif"""
from annif.corpus import SubjectFileCSV, SubjectIndex
def test_recognize_csv_lowercase():
assert SubjectFileCSV.is_csv_file("subjects.csv")
def test_recognize_csv_uppercase():
assert SubjectFileCSV.is_csv_file("SUBJECTS.CSV")
def test_recognize_tsv():
assert not SubjectFileCSV.is_csv_file("subjects.tsv")
def test_recognize_noext():
assert not SubjectFileCSV.is_csv_file("subjects")
def test_load_csv_uri_brackets(tmpdir):
tmpfile = tmpdir.join("subjects.csv")
tmpfile.write(
"uri,label_fi\n"
+ "<http://www.yso.fi/onto/yso/p8993>,hylyt\n"
+ "<http://www.yso.fi/onto/yso/p9285>,neoliittinen kausi"
)
corpus = SubjectFileCSV(str(tmpfile))
subjects = list(corpus.subjects)
assert len(subjects) == 2
assert subjects[0].uri == "http://www.yso.fi/onto/yso/p8993"
assert subjects[0].labels["fi"] == "hylyt"
assert subjects[0].notation is None
assert subjects[1].uri == "http://www.yso.fi/onto/yso/p9285"
assert subjects[1].labels["fi"] == "neoliittinen kausi"
assert subjects[1].notation is None
def test_load_tsv_uri_nobrackets(tmpdir):
tmpfile = tmpdir.join("subjects.csv")
tmpfile.write(
"uri,label_fi\n"
+ "http://www.yso.fi/onto/yso/p8993,hylyt\n"
+ "http://www.yso.fi/onto/yso/p9285,neoliittinen kausi"
)
corpus = SubjectFileCSV(str(tmpfile))
subjects = list(corpus.subjects)
assert len(subjects) == 2
assert subjects[0].uri == "http://www.yso.fi/onto/yso/p8993"
assert subjects[0].labels["fi"] == "hylyt"
assert subjects[0].notation is None
assert subjects[1].uri == "http://www.yso.fi/onto/yso/p9285"
assert subjects[1].labels["fi"] == "neoliittinen kausi"
assert subjects[1].notation is None
def test_load_tsv_with_notations(tmpdir):
tmpfile = tmpdir.join("subjects-with-notations.csv")
tmpfile.write(
"uri,label_fi,notation\n"
+ "http://www.yso.fi/onto/yso/p8993,hylyt,42.42\n"
+ "http://www.yso.fi/onto/yso/p9285,neoliittinen kausi,42.0"
)
corpus = SubjectFileCSV(str(tmpfile))
subjects = list(corpus.subjects)
assert len(subjects) == 2
assert subjects[0].uri == "http://www.yso.fi/onto/yso/p8993"
assert subjects[0].labels["fi"] == "hylyt"
assert subjects[0].notation == "42.42"
assert subjects[1].uri == "http://www.yso.fi/onto/yso/p9285"
assert subjects[1].labels["fi"] == "neoliittinen kausi"
assert subjects[1].notation == "42.0"
def test_load_tsv_with_deprecated(tmpdir):
tmpfile = tmpdir.join("subjects.csv")
tmpfile.write(
"uri,label_fi\n"
+ "<http://www.yso.fi/onto/yso/p8993>,hylyt\n"
+ "<http://example.org/deprecated>,\n"
+ "<http://www.yso.fi/onto/yso/p9285>,neoliittinen kausi"
)
corpus = SubjectFileCSV(str(tmpfile))
subjects = list(corpus.subjects)
assert len(list(corpus.subjects)) == 3
assert subjects[1].labels is None
index = SubjectIndex()
index.load_subjects(corpus)
active = list(index.active)
assert len(active) == 2
assert active[0][1].uri == "http://www.yso.fi/onto/yso/p8993"
assert active[0][1].labels["fi"] == "hylyt"
assert active[0][1].notation is None
assert active[1][1].uri == "http://www.yso.fi/onto/yso/p9285"
assert active[1][1].labels["fi"] == "neoliittinen kausi"
assert active[1][1].notation is None