Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 2c29a90

Browse files
Chipe1norvig
authored andcommitted
Fixed mistake in HITS and add test to NLP (aimacode#441)
* Add test for determineInlinks() * Add test for HITS() * fixed premature updation * Refactor code to match pseudocode
1 parent d3155eb commit 2c29a90

File tree

2 files changed

+18
-13
lines changed

2 files changed

+18
-13
lines changed

nlp.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -356,13 +356,13 @@ def detect(self):
356356
def getInlinks(page):
357357
if not page.inlinks:
358358
page.inlinks = determineInlinks(page)
359-
return [p for addr, p in pagesIndex.items() if addr in page.inlinks]
359+
return [addr for addr, p in pagesIndex.items() if addr in page.inlinks]
360360

361361

362362
def getOutlinks(page):
363363
if not page.outlinks:
364364
page.outlinks = findOutlinks(page)
365-
return [p for addr, p in pagesIndex.items() if addr in page.outlinks]
365+
return [addr for addr, p in pagesIndex.items() if addr in page.outlinks]
366366

367367

368368
# ______________________________________________________________________________
@@ -389,9 +389,11 @@ def HITS(query):
389389
p.authority = 1
390390
p.hub = 1
391391
while True: # repeat until... convergence
392-
for p in pages.values():
393-
p.authority = sum(x.hub for x in getInlinks(p)) # p.authority ← ∑i Inlinki(p).Hub
394-
p.hub = sum(x.authority for x in getOutlinks(p)) # p.hub ← ∑i Outlinki(p).Authority
392+
authority = {p: pages[p].authority for p in pages}
393+
hub = {p: pages[p].hub for p in pages}
394+
for p in pages:
395+
pages[p].authority = sum(hub[x] for x in getInlinks(pages[p])) # p.authority ← ∑i Inlinki(p).Hub
396+
pages[p].hub = sum(authority[x] for x in getOutlinks(pages[p])) # p.hub ← ∑i Outlinki(p).Authority
395397
normalize(pages)
396398
if convergence():
397399
break

tests/test_nlp.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
from nlp import loadPageHTML, stripRawHTML, findOutlinks, onlyWikipediaURLS
55
from nlp import expand_pages, relevant_pages, normalize, ConvergenceDetector, getInlinks
6-
from nlp import getOutlinks, Page
6+
from nlp import getOutlinks, Page, determineInlinks, HITS
77
from nlp import Rules, Lexicon
88
# Clumsy imports because we want to access certain nlp.py globals explicitly, because
99
# they are accessed by function's within nlp.py
@@ -80,9 +80,9 @@ def test_stripRawHTML(html_mock):
8080

8181

8282
def test_determineInlinks():
83-
# TODO
84-
assert True
85-
83+
assert set(determineInlinks(pA)) == set(['B', 'C', 'E'])
84+
assert set(determineInlinks(pE)) == set([])
85+
assert set(determineInlinks(pF)) == set(['E'])
8686

8787
def test_findOutlinks_wiki():
8888
testPage = pageDict[pA.address]
@@ -141,17 +141,20 @@ def test_detectConvergence():
141141

142142
def test_getInlinks():
143143
inlnks = getInlinks(pageDict['A'])
144-
assert sorted([page.address for page in inlnks]) == pageDict['A'].inlinks
144+
assert sorted(inlnks) == pageDict['A'].inlinks
145145

146146

147147
def test_getOutlinks():
148148
outlnks = getOutlinks(pageDict['A'])
149-
assert sorted([page.address for page in outlnks]) == pageDict['A'].outlinks
149+
assert sorted(outlnks) == pageDict['A'].outlinks
150150

151151

152152
def test_HITS():
153-
# TODO
154-
assert True # leave for now
153+
HITS('inherit')
154+
auth_list = [pA.authority, pB.authority, pC.authority, pD.authority, pE.authority, pF.authority]
155+
hub_list = [pA.hub, pB.hub, pC.hub, pD.hub, pE.hub, pF.hub]
156+
assert max(auth_list) == pD.authority
157+
assert max(hub_list) == pE.hub
155158

156159

157160
if __name__ == '__main__':

0 commit comments

Comments
 (0)