-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcorpus_test.py
65 lines (46 loc) · 1.87 KB
/
corpus_test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import unittest
from src.corpus import Corpus, VectorSpace
from src.corpus import Sequence
from src.poem_engine import NaiveBayes
testcorpus = '../corpus/testcorpus.txt'
class TestStringMethods(unittest.TestCase):
def test_iter(self):
corpus = Corpus(testcorpus, prefix_padding_size=2)
assert([s.augmented() for s in corpus] == [
['~P-2~', '~P-1~', 'and', 'all', 'is', 'all', '~END~'],
['~P-2~', '~P-1~', 'and', 'each', 'is', 'all', '~END~'],
['~P-2~', '~P-1~', 'and', 'infinite', 'the', 'glory', '~END~']
])
assert ([str(s) for s in corpus] == [
str(['and', 'all', 'is', 'all']),
str(['and', 'each', 'is', 'all']),
str(['and', 'infinite', 'the', 'glory'])
])
corpus = Corpus(testcorpus)
assert ([s.augmented() for s in corpus] == [
['and', 'all', 'is', 'all', '~END~'],
['and', 'each', 'is', 'all', '~END~'],
['and', 'infinite', 'the', 'glory', '~END~']
])
def test_get(self):
corpus = Corpus(testcorpus, prefix_padding_size=0)
sequence = corpus[0]
self.assertEquals(sequence.list, ['and', 'all', 'is', 'all'])
self.assertEquals(sequence[2], 'is')
def test_slice(self):
corpus = Corpus(testcorpus, prefix_padding_size=0)
sequence = corpus[0]
self.assertEquals(sequence.list, sequence[0: len(sequence)])
class TestGensim(unittest.TestCase):
def test_dict(self):
vs = VectorSpace(Corpus(testcorpus))
class TestBayes(unittest.TestCase):
def test_p(self):
corpus = Corpus('', 3)
bayes = NaiveBayes(3)
bayes.turn(corpus)
p = bayes.p(('is',), ('all',))
self.assertEquals(1/3.0, p)
self.assertListEqual(['and', 'all', 'is', 'all'], bayes.generate().list)
if __name__ == '__main__':
unittest.main()