Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit e6d5fcf

Browse files
Chipe1norvig
authored andcommitted
Intersection query for relevant_pages (aimacode#509)
* Modified relevant_pages() * Additional tests for relevant_pages()
1 parent cd08bec commit e6d5fcf

File tree

2 files changed

+18
-12
lines changed

2 files changed

+18
-12
lines changed

nlp.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -301,15 +301,17 @@ def expand_pages(pages):
301301

302302

303303
def relevant_pages(query):
304-
"""Relevant pages are pages that contain the query in its entireity.
305-
If a page's content contains the query it is returned by the function."""
306-
relevant = {}
307-
print("pagesContent in function: ", pagesContent)
308-
for addr, page in pagesIndex.items():
309-
if query.lower() in pagesContent[addr].lower():
310-
relevant[addr] = page
311-
return relevant
312-
304+
"""Relevant pages are pages that contain all of the query words. They are obtained by
305+
intersecting the hit lists of the query words."""
306+
hit_intersection = {addr for addr in pagesIndex}
307+
query_words = query.split()
308+
for query_word in query_words:
309+
hit_list = set()
310+
for addr in pagesIndex:
311+
if query_word.lower() in pagesContent[addr].lower():
312+
hit_list.add(addr)
313+
hit_intersection = hit_intersection.intersection(hit_list)
314+
return {addr: pagesIndex[addr] for addr in hit_intersection}
313315

314316
def normalize(pages):
315317
"""From the pseudocode: Normalize divides each page's score by the sum of

tests/test_nlp.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ def test_lexicon():
3030
href="https://google.com.au"
3131
< href="/wiki/TestThing" > href="/wiki/TestBoy"
3232
href="/wiki/TestLiving" href="/wiki/TestMan" >"""
33-
testHTML2 = "Nothing"
33+
testHTML2 = "a mom and a dad"
3434
testHTML3 = """
3535
<!DOCTYPE html>
3636
<html>
@@ -106,9 +106,13 @@ def test_expand_pages():
106106

107107

108108
def test_relevant_pages():
109-
pages = relevant_pages("male")
110-
assert all((x in pages.keys()) for x in ['A', 'C', 'E'])
109+
pages = relevant_pages("his dad")
110+
assert all((x in pages) for x in ['A', 'C', 'E'])
111111
assert all((x not in pages) for x in ['B', 'D', 'F'])
112+
pages = relevant_pages("mom and dad")
113+
assert all((x in pages) for x in ['A', 'B', 'C', 'D', 'E', 'F'])
114+
pages = relevant_pages("philosophy")
115+
assert all((x not in pages) for x in ['A', 'B', 'C', 'D', 'E', 'F'])
112116

113117

114118
def test_normalize():

0 commit comments

Comments
 (0)