aimacode · norvig · Aug 16, 2017 · Aug 12, 2017 · Aug 12, 2017 · Aug 13, 2017
diff --git a/learning.py b/learning.py
@@ -306,13 +306,35 @@ def predict(example):
 # ______________________________________________________________________________
 
 
-def NaiveBayesLearner(dataset, continuous=True):
+def NaiveBayesLearner(dataset, continuous=True, simple=False):
+    if simple:
+        return NaiveBayesSimple(dataset)
     if(continuous):
         return NaiveBayesContinuous(dataset)
     else:
         return NaiveBayesDiscrete(dataset)
 
 
+def NaiveBayesSimple(distribution):
+    """A simple naive bayes classifier that takes as input a dictionary of
+    CountingProbDist objects and classifies items according to these distributions.
+    The input dictionary is in the following form:
+        (ClassName, ClassProb): CountingProbDist"""
+    target_dist = {c_name: prob for c_name, prob in distribution.keys()}
+    attr_dists = {c_name: count_prob for (c_name, _), count_prob in distribution.items()}
+
+    def predict(example):
+        """Predict the target value for example. Calculate probabilities for each
+        class and pick the max."""
+        def class_probability(targetval):
+            attr_dist = attr_dists[targetval]
+            return target_dist[targetval] * product(attr_dist[a] for a in example)
+
+        return argmax(target_dist.keys(), key=class_probability)
+
+    return predict
+
+
 def NaiveBayesDiscrete(dataset):
     """Just count how many times each value of each input attribute
     occurs, conditional on the target value. Count the different

diff --git a/tests/test_learning.py b/tests/test_learning.py
@@ -105,6 +105,20 @@ def test_naive_bayes():
     assert nBC([6, 5, 3, 1.5]) == "versicolor"
     assert nBC([7, 3, 6.5, 2]) == "virginica"
 
+    # Simple
+    data1 = 'a'*50 + 'b'*30 + 'c'*15
+    dist1 = CountingProbDist(data1)
+    data2 = 'a'*30 + 'b'*45 + 'c'*20
+    dist2 = CountingProbDist(data2)
+    data3 = 'a'*20 + 'b'*20 + 'c'*35
+    dist3 = CountingProbDist(data3)
+
+    dist = {('First', 0.5): dist1, ('Second', 0.3): dist2, ('Third', 0.2): dist3}
+    nBS = NaiveBayesLearner(dist, simple=True)
+    assert nBS('aab') == 'First'
+    assert nBS(['b', 'b']) == 'Second'
+    assert nBS('ccbcc') == 'Third'
+
 
 def test_k_nearest_neighbors():
     iris = DataSet(name="iris")