Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 8e0bfd3

Browse files
antmarakisnorvig
authored andcommitted
Updated test_text.py (aimacode#349)
* Rearranged Tests - test_ngram_models to the top - added test_viterbi-segmentation - removed test_unigram_text_model * "test_ngram_models" to "test_text_models"
1 parent 70f0abd commit 8e0bfd3

File tree

1 file changed

+45
-45
lines changed

1 file changed

+45
-45
lines changed

tests/test_text.py

Lines changed: 45 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,55 @@
66
from utils import isclose, DataFile
77

88

9-
def test_unigram_text_model():
9+
def test_text_models():
1010
flatland = DataFile("EN-text/flatland.txt").read()
1111
wordseq = words(flatland)
12-
P = UnigramTextModel(wordseq)
12+
P1 = UnigramTextModel(wordseq)
13+
P2 = NgramTextModel(2, wordseq)
14+
P3 = NgramTextModel(3, wordseq)
15+
16+
# The most frequent entries in each model
17+
assert P1.top(10) == [(2081, 'the'), (1479, 'of'), (1021, 'and'),
18+
(1008, 'to'), (850, 'a'), (722, 'i'), (640, 'in'),
19+
(478, 'that'), (399, 'is'), (348, 'you')]
20+
21+
assert P2.top(10) == [(368, ('of', 'the')), (152, ('to', 'the')),
22+
(152, ('in', 'the')), (86, ('of', 'a')),
23+
(80, ('it', 'is')),
24+
(71, ('by', 'the')), (68, ('for', 'the')),
25+
(68, ('and', 'the')), (62, ('on', 'the')),
26+
(60, ('to', 'be'))]
27+
28+
assert P3.top(10) == [(30, ('a', 'straight', 'line')),
29+
(19, ('of', 'three', 'dimensions')),
30+
(16, ('the', 'sense', 'of')),
31+
(13, ('by', 'the', 'sense')),
32+
(13, ('as', 'well', 'as')),
33+
(12, ('of', 'the', 'circles')),
34+
(12, ('of', 'sight', 'recognition')),
35+
(11, ('the', 'number', 'of')),
36+
(11, ('that', 'i', 'had')), (11, ('so', 'as', 'to'))]
1337

14-
s, p = viterbi_segment('itiseasytoreadwordswithoutspaces', P)
38+
assert isclose(P1['the'], 0.0611, rel_tol=0.001)
39+
40+
assert isclose(P2['of', 'the'], 0.0108, rel_tol=0.01)
41+
42+
assert isclose(P3['', '', 'but'], 0.0, rel_tol=0.001)
43+
assert isclose(P3['', '', 'but'], 0.0, rel_tol=0.001)
44+
assert isclose(P3['so', 'as', 'to'], 0.000323, rel_tol=0.001)
45+
46+
assert P2.cond_prob.get(('went',)) is None
47+
48+
assert P3.cond_prob['in', 'order'].dictionary == {'to': 6}
49+
50+
51+
def test_viterbi_segmentation():
52+
flatland = DataFile("EN-text/flatland.txt").read()
53+
wordseq = words(flatland)
54+
P = UnigramTextModel(wordseq)
55+
text = "itiseasytoreadwordswithoutspaces"
1556

57+
s, p = viterbi_segment(text,P)
1658
assert s == [
1759
'it', 'is', 'easy', 'to', 'read', 'words', 'without', 'spaces']
1860

@@ -56,48 +98,6 @@ def test_counting_probability_distribution():
5698
assert 1 / 7 <= min(ps) <= max(ps) <= 1 / 5
5799

58100

59-
def test_ngram_models():
60-
flatland = DataFile("EN-text/flatland.txt").read()
61-
wordseq = words(flatland)
62-
P1 = UnigramTextModel(wordseq)
63-
P2 = NgramTextModel(2, wordseq)
64-
P3 = NgramTextModel(3, wordseq)
65-
66-
# The most frequent entries in each model
67-
assert P1.top(10) == [(2081, 'the'), (1479, 'of'), (1021, 'and'),
68-
(1008, 'to'), (850, 'a'), (722, 'i'), (640, 'in'),
69-
(478, 'that'), (399, 'is'), (348, 'you')]
70-
71-
assert P2.top(10) == [(368, ('of', 'the')), (152, ('to', 'the')),
72-
(152, ('in', 'the')), (86, ('of', 'a')),
73-
(80, ('it', 'is')),
74-
(71, ('by', 'the')), (68, ('for', 'the')),
75-
(68, ('and', 'the')), (62, ('on', 'the')),
76-
(60, ('to', 'be'))]
77-
78-
assert P3.top(10) == [(30, ('a', 'straight', 'line')),
79-
(19, ('of', 'three', 'dimensions')),
80-
(16, ('the', 'sense', 'of')),
81-
(13, ('by', 'the', 'sense')),
82-
(13, ('as', 'well', 'as')),
83-
(12, ('of', 'the', 'circles')),
84-
(12, ('of', 'sight', 'recognition')),
85-
(11, ('the', 'number', 'of')),
86-
(11, ('that', 'i', 'had')), (11, ('so', 'as', 'to'))]
87-
88-
assert isclose(P1['the'], 0.0611, rel_tol=0.001)
89-
90-
assert isclose(P2['of', 'the'], 0.0108, rel_tol=0.01)
91-
92-
assert isclose(P3['', '', 'but'], 0.0, rel_tol=0.001)
93-
assert isclose(P3['', '', 'but'], 0.0, rel_tol=0.001)
94-
assert isclose(P3['so', 'as', 'to'], 0.000323, rel_tol=0.001)
95-
96-
assert P2.cond_prob.get(('went',)) is None
97-
98-
assert P3.cond_prob['in', 'order'].dictionary == {'to': 6}
99-
100-
101101
def test_ir_system():
102102
from collections import namedtuple
103103
Results = namedtuple('IRResults', ['score', 'url'])

0 commit comments

Comments
 (0)