Fixed mistake in HITS and add test to NLP (aimacode#441)

Chipe1 · norvig · commit 2c29a9005ea8 · 2017-04-17T14:23:02.000-07:00
* Add test for determineInlinks()

* Add test for HITS()

* fixed premature updation

* Refactor code to match pseudocode
diff --git a/nlp.py b/nlp.py
@@ -356,13 +356,13 @@ def detect(self):
 def getInlinks(page):
     if not page.inlinks:
         page.inlinks = determineInlinks(page)
-    return [p for addr, p in pagesIndex.items() if addr in page.inlinks]
+    return [addr for addr, p in pagesIndex.items() if addr in page.inlinks]
 
 
 def getOutlinks(page):
     if not page.outlinks:
         page.outlinks = findOutlinks(page)
-    return [p for addr, p in pagesIndex.items() if addr in page.outlinks]
+    return [addr for addr, p in pagesIndex.items() if addr in page.outlinks]
 
 
 # ______________________________________________________________________________
@@ -389,9 +389,11 @@ def HITS(query):
         p.authority = 1
         p.hub = 1
     while True:  # repeat until... convergence
-        for p in pages.values():
-            p.authority = sum(x.hub for x in getInlinks(p))  # p.authority ← ∑i Inlinki(p).Hub
-            p.hub = sum(x.authority for x in getOutlinks(p))  # p.hub ← ∑i Outlinki(p).Authority
+        authority = {p: pages[p].authority for p in pages}
+        hub = {p: pages[p].hub for p in pages}
+        for p in pages:
+            pages[p].authority = sum(hub[x] for x in getInlinks(pages[p]))  # p.authority ← ∑i Inlinki(p).Hub
+            pages[p].hub = sum(authority[x] for x in getOutlinks(pages[p]))  # p.hub ← ∑i Outlinki(p).Authority
         normalize(pages)
         if convergence():
             break
diff --git a/tests/test_nlp.py b/tests/test_nlp.py
@@ -3,7 +3,7 @@
 
 from nlp import loadPageHTML, stripRawHTML, findOutlinks, onlyWikipediaURLS
 from nlp import expand_pages, relevant_pages, normalize, ConvergenceDetector, getInlinks
-from nlp import getOutlinks, Page
+from nlp import getOutlinks, Page, determineInlinks, HITS
 from nlp import Rules, Lexicon
 # Clumsy imports because we want to access certain nlp.py globals explicitly, because
 # they are accessed by function's within nlp.py
@@ -80,9 +80,9 @@ def test_stripRawHTML(html_mock):
 
 
 def test_determineInlinks():
-    # TODO
-    assert True
-
+    assert set(determineInlinks(pA)) == set(['B', 'C', 'E'])
+    assert set(determineInlinks(pE)) == set([])
+    assert set(determineInlinks(pF)) == set(['E'])
 
 def test_findOutlinks_wiki():
     testPage = pageDict[pA.address]
@@ -141,17 +141,20 @@ def test_detectConvergence():
 
 def test_getInlinks():
     inlnks = getInlinks(pageDict['A'])
-    assert sorted([page.address for page in inlnks]) == pageDict['A'].inlinks
+    assert sorted(inlnks) == pageDict['A'].inlinks
 
 
 def test_getOutlinks():
     outlnks = getOutlinks(pageDict['A'])
-    assert sorted([page.address for page in outlnks]) == pageDict['A'].outlinks
+    assert sorted(outlnks) == pageDict['A'].outlinks
 
 
 def test_HITS():
-    # TODO
-    assert True  # leave for now
+    HITS('inherit')
+    auth_list = [pA.authority, pB.authority, pC.authority, pD.authority, pE.authority, pF.authority]
+    hub_list = [pA.hub, pB.hub, pC.hub, pD.hub, pE.hub, pF.hub]
+    assert max(auth_list) == pD.authority
+    assert max(hub_list) == pE.hub
 
 
 if __name__ == '__main__':