Fixed normalize() to work on []. Added AdaBoost.

darius · darius · commit 60a20a4f2e1e · 2011-10-23T22:02:21.000Z
diff --git a/learning.py b/learning.py
@@ -1,7 +1,7 @@
 """Learn to estimate functions from examples. (Chapters 18-20)"""
 
 from utils import *
-import heapq, random
+import heapq, math, random
 
 #______________________________________________________________________________
 
@@ -318,9 +318,7 @@ def split_by(attr, examples):
 def information_content(values):
     "Number of bits to represent the probability distribution in values."
     # If the values do not sum to 1, normalize them to make them a Prob. Dist.
-    values = removeall(0, values)
-    s = float(sum(values))
-    if s != 1.0: values = [v/s for v in values]
+    values = normalize(removeall(0, values))
     return sum([- v * log2(v) for v in values])
 
 #______________________________________________________________________________
@@ -394,6 +392,34 @@ def predict(example):
         return predict
     return train
 
+#______________________________________________________________________________
+
+def AdaBoost(L, K):
+    """[Fig. 18.34]"""
+    def train(dataset):
+        examples, target = dataset.examples, dataset.target
+        N = len(examples)
+        w = [1./N] * N
+        h, z = [], []
+        for k in range(K):
+            h_k = L(dataset.examples, w)
+            h.append(h_k)
+            error = sum(weight for example, weight in zip(examples, w)
+                        if example[target] != h_k(example))
+            if error == 0:
+                break
+            assert error < 1, "AdaBoost's sub-learner misclassified everything"
+            for j, example in enumerate(examples):
+                if example[target] == h[k](example):
+                    w[j] *= error / (1. - error)
+            w = normalize(w)
+            z.append(math.log((1. - error) / error))
+        return WeightedMajority(h, z)
+    return train
+
+def WeightedMajority(h, z):
+    raise NotImplementedError
+
 #_____________________________________________________________________________
 # Functions for testing learners on examples
 
diff --git a/utils.py b/utils.py
@@ -518,13 +518,13 @@ def num_or_str(x):
         except ValueError:
             return str(x).strip()
 
-def normalize(numbers, total=1.0):
-    """Multiply each number by a constant such that the sum is 1.0 (or total).
+def normalize(numbers):
+    """Multiply each number by a constant such that the sum is 1.0
     >>> normalize([1,2,1])
     [0.25, 0.5, 0.25]
     """
-    k = total / sum(numbers)
-    return [k * n for n in numbers]
+    total = float(sum(numbers))
+    return [n / total for n in numbers]
 
 ## OK, the following are not as widely useful utilities as some of the other
 ## functions here, but they do show up wherever we have 2D grids: Wumpus and