Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 92c98f9

Browse files
antmarakisnorvig
authored andcommitted
NLP: CYK Parse (aimacode#601)
* Update nlp.py * add CYK parsing test
1 parent 14c3f77 commit 92c98f9

File tree

2 files changed

+68
-8
lines changed

2 files changed

+68
-8
lines changed

nlp.py

Lines changed: 60 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ def __init__(self, name, rules, lexicon):
116116
self.rules = rules
117117
self.lexicon = lexicon
118118
self.categories = defaultdict(list)
119+
119120
for lhs in lexicon:
120121
for word, prob in lexicon[lhs]:
121122
self.categories[word].append((lhs, prob))
@@ -128,6 +129,16 @@ def isa(self, word, cat):
128129
"""Return True iff word is of category cat"""
129130
return cat in [c for c, _ in self.categories[word]]
130131

132+
def cnf_rules(self):
133+
"""Returns the tuple (X, Y, Z, p) for rules in the form:
134+
X -> Y Z [p]"""
135+
cnf = []
136+
for X, rules in self.rules.items():
137+
for (Y, Z), p in rules:
138+
cnf.append((X, Y, Z, p))
139+
140+
return cnf
141+
131142
def generate_random(self, S='S'):
132143
"""Replace each token in S by a random entry in grammar (recursively).
133144
Returns a tuple of (sentence, probability)."""
@@ -189,11 +200,48 @@ def __repr__(self):
189200
V='saw | liked | feel'
190201
))
191202

192-
E_NP_ = Grammar('E_NP_', # another trivial grammar for testing
203+
E_NP_ = Grammar('E_NP_', # Another Trivial Grammar for testing
193204
Rules(NP='Adj NP | N'),
194205
Lexicon(Adj='happy | handsome | hairy',
195206
N='man'))
196207

208+
E_Prob = ProbGrammar('E_Prob', # The Probabilistic Grammar from the notebook
209+
ProbRules(
210+
S="NP VP [0.6] | S Conjuction S [0.4]",
211+
NP="Pronoun [0.2] | Name [0.05] | Noun [0.2] | Article Noun [0.15] \
212+
| Article Adjs Noun [0.1] | Digit [0.05] | NP PP [0.15] | NP RelClause [0.1]",
213+
VP="Verb [0.3] | VP NP [0.2] | VP Adjective [0.25] | VP PP [0.15] | VP Adverb [0.1]",
214+
Adjs="Adjective [0.5] | Adjective Adjs [0.5]",
215+
PP="Preposition NP [1]",
216+
RelClause="RelPro VP [1]"
217+
),
218+
ProbLexicon(
219+
Verb="is [0.5] | say [0.3] | are [0.2]",
220+
Noun="robot [0.4] | sheep [0.4] | fence [0.2]",
221+
Adjective="good [0.5] | new [0.2] | sad [0.3]",
222+
Adverb="here [0.6] | lightly [0.1] | now [0.3]",
223+
Pronoun="me [0.3] | you [0.4] | he [0.3]",
224+
RelPro="that [0.5] | who [0.3] | which [0.2]",
225+
Name="john [0.4] | mary [0.4] | peter [0.2]",
226+
Article="the [0.5] | a [0.25] | an [0.25]",
227+
Preposition="to [0.4] | in [0.3] | at [0.3]",
228+
Conjuction="and [0.5] | or [0.2] | but [0.3]",
229+
Digit="0 [0.35] | 1 [0.35] | 2 [0.3]"
230+
))
231+
232+
E_Prob_Chomsky = ProbGrammar('E_Prob_Chomsky', # A Probabilistic Grammar in CNF
233+
ProbRules(
234+
S='NP VP [1]',
235+
NP='Article Noun [0.6] | Adjective Noun [0.4]',
236+
VP='Verb NP [0.5] | Verb Adjective [0.5]',
237+
),
238+
ProbLexicon(
239+
Article='the [0.5] | a [0.25] | an [0.25]',
240+
Noun='robot [0.4] | sheep [0.4] | fence [0.2]',
241+
Adjective='good [0.5] | new [0.2] | sad [0.3]',
242+
Verb='is [0.5] | say [0.3] | are [0.2]'
243+
))
244+
197245

198246
# ______________________________________________________________________________
199247
# Chart Parsing
@@ -236,7 +284,7 @@ def parse(self, words, S='S'):
236284
return self.chart
237285

238286
def add_edge(self, edge):
239-
"Add edge to chart, and see if it extends or predicts another edge."
287+
"""Add edge to chart, and see if it extends or predicts another edge."""
240288
start, end, lhs, found, expects = edge
241289
if edge not in self.chart[end]:
242290
self.chart[end].append(edge)
@@ -248,21 +296,21 @@ def add_edge(self, edge):
248296
self.predictor(edge)
249297

250298
def scanner(self, j, word):
251-
"For each edge expecting a word of this category here, extend the edge."
299+
"""For each edge expecting a word of this category here, extend the edge."""
252300
for (i, j, A, alpha, Bb) in self.chart[j]:
253301
if Bb and self.grammar.isa(word, Bb[0]):
254302
self.add_edge([i, j+1, A, alpha + [(Bb[0], word)], Bb[1:]])
255303

256304
def predictor(self, edge):
257-
"Add to chart any rules for B that could help extend this edge."
305+
"""Add to chart any rules for B that could help extend this edge."""
258306
(i, j, A, alpha, Bb) = edge
259307
B = Bb[0]
260308
if B in self.grammar.rules:
261309
for rhs in self.grammar.rewrites_for(B):
262310
self.add_edge([j, j, B, [], rhs])
263311

264312
def extender(self, edge):
265-
"See what edges can be extended by this edge."
313+
"""See what edges can be extended by this edge."""
266314
(j, k, B, _, _) = edge
267315
for (i, j, A, alpha, B1b) in self.chart[j]:
268316
if B1b and B == B1b[0]:
@@ -273,23 +321,26 @@ def extender(self, edge):
273321
# CYK Parsing
274322

275323
def CYK_parse(words, grammar):
276-
"[Figure 23.5]"
324+
""" [Figure 23.5] """
277325
# We use 0-based indexing instead of the book's 1-based.
278326
N = len(words)
279327
P = defaultdict(float)
328+
280329
# Insert lexical rules for each word.
281330
for (i, word) in enumerate(words):
282-
for (X, p) in grammar.categories[word]: # XXX grammar.categories needs changing, above
331+
for (X, p) in grammar.categories[word]:
283332
P[X, i, 1] = p
333+
284334
# Combine first and second parts of right-hand sides of rules,
285335
# from short to long.
286336
for length in range(2, N+1):
287337
for start in range(N-length+1):
288338
for len1 in range(1, length): # N.B. the book incorrectly has N instead of length
289339
len2 = length - len1
290-
for (X, Y, Z, p) in grammar.cnf_rules(): # XXX grammar needs this method
340+
for (X, Y, Z, p) in grammar.cnf_rules():
291341
P[X, start, length] = max(P[X, start, length],
292342
P[Y, start, len1] * P[Z, start+len1, len2] * p)
343+
293344
return P
294345

295346

@@ -395,6 +446,7 @@ def relevant_pages(query):
395446
hit_intersection = hit_intersection.intersection(hit_list)
396447
return {addr: pagesIndex[addr] for addr in hit_intersection}
397448

449+
398450
def normalize(pages):
399451
"""Normalize divides each page's score by the sum of the squares of all
400452
pages' scores (separately for both the authority and hub scores).

tests/test_nlp.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from nlp import expand_pages, relevant_pages, normalize, ConvergenceDetector, getInlinks
66
from nlp import getOutlinks, Page, determineInlinks, HITS
77
from nlp import Rules, Lexicon, Grammar, ProbRules, ProbLexicon, ProbGrammar
8+
from nlp import CYK_parse
89
# Clumsy imports because we want to access certain nlp.py globals explicitly, because
910
# they are accessed by functions within nlp.py
1011

@@ -92,6 +93,13 @@ def test_prob_generation():
9293
assert len(sentence) == 2
9394

9495

96+
def test_CYK_parse():
97+
grammar = nlp.E_Prob_Chomsky
98+
words = ['the', 'robot', 'is', 'good']
99+
P = CYK_parse(words, grammar)
100+
assert len(P) == 52
101+
102+
95103
# ______________________________________________________________________________
96104
# Data Setup
97105

0 commit comments

Comments
 (0)