diff --git a/probability.py b/probability.py
index 458273b92..c907e348d 100644
--- a/probability.py
+++ b/probability.py
@@ -13,19 +13,23 @@
 from collections import defaultdict
 from functools import reduce
 
+
 # ______________________________________________________________________________
 
 
 def DTAgentProgram(belief_state):
     """A decision-theoretic agent. [Figure 13.1]"""
+
     def program(percept):
         belief_state.observe(program.action, percept)
         program.action = argmax(belief_state.actions(),
                                 key=belief_state.expected_outcome_utility)
         return program.action
+
     program.action = None
     return program
 
+
 # ______________________________________________________________________________
 
 
@@ -132,6 +136,7 @@ def event_values(event, variables):
     else:
         return tuple([event[var] for var in variables])
 
+
 # ______________________________________________________________________________
 
 
@@ -160,6 +165,7 @@ def enumerate_joint(variables, e, P):
     return sum([enumerate_joint(rest, extend(e, Y, y), P)
                 for y in P.values(Y)])
 
+
 # ______________________________________________________________________________
 
 
@@ -378,6 +384,7 @@ def __repr__(self):
     ('MaryCalls', 'Alarm', {T: 0.70, F: 0.01})
 ])
 
+
 # ______________________________________________________________________________
 
 
@@ -409,6 +416,7 @@ def enumerate_all(variables, e, bn):
         return sum(Ynode.p(y, e) * enumerate_all(rest, extend(e, Y, y), bn)
                    for y in bn.variable_values(Y))
 
+
 # ______________________________________________________________________________
 
 
@@ -498,6 +506,7 @@ def all_events(variables, bn, e):
             for x in bn.variable_values(X):
                 yield extend(e1, X, x)
 
+
 # ______________________________________________________________________________
 
 # [Figure 14.12a]: sprinkler network
@@ -510,6 +519,7 @@ def all_events(variables, bn, e):
     ('WetGrass', 'Sprinkler Rain',
      {(T, T): 0.99, (T, F): 0.90, (F, T): 0.90, (F, F): 0.00})])
 
+
 # ______________________________________________________________________________
 
 
@@ -521,6 +531,7 @@ def prior_sample(bn):
         event[node.variable] = node.sample(event)
     return event
 
+
 # _________________________________________________________________________
 
 
@@ -547,6 +558,7 @@ def consistent_with(event, evidence):
     return all(evidence.get(k, v) == v
                for k, v in event.items())
 
+
 # _________________________________________________________________________
 
 
@@ -579,6 +591,7 @@ def weighted_sample(bn, e):
             event[Xi] = node.sample(event)
     return event, w
 
+
 # _________________________________________________________________________
 
 
@@ -612,6 +625,7 @@ def markov_blanket_sample(X, e, bn):
     # (assuming a Boolean variable here)
     return probability(Q.normalize()[True])
 
+
 # _________________________________________________________________________
 
 
@@ -655,7 +669,7 @@ def forward_backward(HMM, ev, prior):
 
     fv = [[0.0, 0.0] for _ in range(len(ev))]
     b = [1.0, 1.0]
-    bv = [b]    # we don't need bv; but we will have a list of all backward messages here
+    bv = [b]  # we don't need bv; but we will have a list of all backward messages here
     sv = [[0, 0] for _ in range(len(ev))]
 
     fv[0] = prior
@@ -671,6 +685,33 @@ def forward_backward(HMM, ev, prior):
 
     return sv
 
+
+def viterbi(HMM, ev, prior):
+    """[Figure 15.5]
+    Viterbi algorithm to find the most likely sequence. Computes the best path,
+    given an HMM model and a sequence of observations."""
+    t = len(ev)
+    ev.insert(0, None)
+
+    m = [[0.0, 0.0] for _ in range(len(ev) - 1)]
+
+    # the recursion is initialized with m1 = forward(P(X0), e1)
+    m[0] = forward(HMM, prior, ev[1])
+
+    for i in range(1, t):
+        m[i] = element_wise_product(HMM.sensor_dist(ev[i + 1]),
+                                    [max(element_wise_product(HMM.transition_model[0], m[i - 1])),
+                                     max(element_wise_product(HMM.transition_model[1], m[i - 1]))])
+
+    path = [0.0] * (len(ev) - 1)
+    # the construction of the most likely sequence starts in the final state with the largest probability,
+    # and runs backwards; the algorithm needs to store for each xt its best predecessor xt-1
+    for i in range(t, -1, -1):
+        path[i - 1] = max(m[i - 1])
+
+    return path
+
+
 # _________________________________________________________________________
 
 
@@ -702,6 +743,7 @@ def fixed_lag_smoothing(e_t, HMM, d, ev, t):
     else:
         return None
 
+
 # _________________________________________________________________________
 
 
@@ -742,13 +784,15 @@ def particle_filtering(e, N, HMM):
 
     return s
 
+
 # _________________________________________________________________________
-## TODO: Implement continuous map for MonteCarlo similar to Fig25.10 from the book
+# TODO: Implement continuous map for MonteCarlo similar to Fig25.10 from the book
 
 
 class MCLmap:
     """Map which provides probability distributions and sensor readings.
     Consists of discrete cells which are either an obstacle or empty"""
+
     def __init__(self, m):
         self.m = m
         self.nrows = len(m)
@@ -772,7 +816,7 @@ def ray_cast(self, sensor_num, kin_state):
         #  0
         # 3R1
         #  2
-        delta = ((sensor_num % 2 == 0)*(sensor_num - 1), (sensor_num % 2 == 1)*(2 - sensor_num))
+        delta = ((sensor_num % 2 == 0) * (sensor_num - 1), (sensor_num % 2 == 1) * (2 - sensor_num))
         # sensor direction changes based on orientation
         for _ in range(orient):
             delta = (delta[1], -delta[0])
@@ -790,9 +834,9 @@ def ray_cast(sensor_num, kin_state, m):
         return m.ray_cast(sensor_num, kin_state)
 
     M = len(z)
-    W = [0]*N
-    S_ = [0]*N
-    W_ = [0]*N
+    W = [0] * N
+    S_ = [0] * N
+    W_ = [0] * N
     v = a['v']
     w = a['w']
 
diff --git a/tests/test_probability.py b/tests/test_probability.py
index b4d720937..e4a83ae47 100644
--- a/tests/test_probability.py
+++ b/tests/test_probability.py
@@ -1,4 +1,7 @@
 import random
+
+import pytest
+
 from probability import *
 from utils import rounder
 
@@ -47,7 +50,7 @@ def test_probdist_frequency():
 
     P = ProbDist('Pascal-5', {'x1': 1, 'x2': 5, 'x3': 10, 'x4': 10, 'x5': 5, 'x6': 1})
     assert (P['x1'], P['x2'], P['x3'], P['x4'], P['x5'], P['x6']) == (
-            0.03125, 0.15625, 0.3125, 0.3125, 0.15625, 0.03125)
+        0.03125, 0.15625, 0.3125, 0.3125, 0.15625, 0.03125)
 
 
 def test_probdist_normalize():
@@ -60,7 +63,7 @@ def test_probdist_normalize():
     P['1'], P['2'], P['3'], P['4'], P['5'], P['6'] = 10, 15, 25, 30, 40, 80
     P = P.normalize()
     assert (P.prob['1'], P.prob['2'], P.prob['3'], P.prob['4'], P.prob['5'], P.prob['6']) == (
-                                                    0.05, 0.075, 0.125, 0.15, 0.2, 0.4)
+        0.05, 0.075, 0.125, 0.15, 0.2, 0.4)
 
 
 def test_jointprob():
@@ -106,7 +109,7 @@ def test_enumerate_joint_ask():
     P[0, 1] = 0.5
     P[1, 1] = P[2, 1] = 0.125
     assert enumerate_joint_ask(
-            'X', dict(Y=1), P).show_approx() == '0: 0.667, 1: 0.167, 2: 0.167'
+        'X', dict(Y=1), P).show_approx() == '0: 0.667, 1: 0.167, 2: 0.167'
 
 
 def test_bayesnode_p():
@@ -126,38 +129,38 @@ def test_bayesnode_sample():
 
 def test_enumeration_ask():
     assert enumeration_ask(
-            'Burglary', dict(JohnCalls=T, MaryCalls=T),
-            burglary).show_approx() == 'False: 0.716, True: 0.284'
+        'Burglary', dict(JohnCalls=T, MaryCalls=T),
+        burglary).show_approx() == 'False: 0.716, True: 0.284'
     assert enumeration_ask(
-            'Burglary', dict(JohnCalls=T, MaryCalls=F),
-            burglary).show_approx() == 'False: 0.995, True: 0.00513'
+        'Burglary', dict(JohnCalls=T, MaryCalls=F),
+        burglary).show_approx() == 'False: 0.995, True: 0.00513'
     assert enumeration_ask(
-            'Burglary', dict(JohnCalls=F, MaryCalls=T),
-            burglary).show_approx() == 'False: 0.993, True: 0.00688'
+        'Burglary', dict(JohnCalls=F, MaryCalls=T),
+        burglary).show_approx() == 'False: 0.993, True: 0.00688'
     assert enumeration_ask(
-            'Burglary', dict(JohnCalls=T),
-            burglary).show_approx() == 'False: 0.984, True: 0.0163'
+        'Burglary', dict(JohnCalls=T),
+        burglary).show_approx() == 'False: 0.984, True: 0.0163'
     assert enumeration_ask(
-            'Burglary', dict(MaryCalls=T),
-            burglary).show_approx() == 'False: 0.944, True: 0.0561'
+        'Burglary', dict(MaryCalls=T),
+        burglary).show_approx() == 'False: 0.944, True: 0.0561'
 
 
 def test_elemination_ask():
     assert elimination_ask(
-            'Burglary', dict(JohnCalls=T, MaryCalls=T),
-            burglary).show_approx() == 'False: 0.716, True: 0.284'
+        'Burglary', dict(JohnCalls=T, MaryCalls=T),
+        burglary).show_approx() == 'False: 0.716, True: 0.284'
     assert elimination_ask(
-            'Burglary', dict(JohnCalls=T, MaryCalls=F),
-            burglary).show_approx() == 'False: 0.995, True: 0.00513'
+        'Burglary', dict(JohnCalls=T, MaryCalls=F),
+        burglary).show_approx() == 'False: 0.995, True: 0.00513'
     assert elimination_ask(
-            'Burglary', dict(JohnCalls=F, MaryCalls=T),
-            burglary).show_approx() == 'False: 0.993, True: 0.00688'
+        'Burglary', dict(JohnCalls=F, MaryCalls=T),
+        burglary).show_approx() == 'False: 0.993, True: 0.00688'
     assert elimination_ask(
-            'Burglary', dict(JohnCalls=T),
-            burglary).show_approx() == 'False: 0.984, True: 0.0163'
+        'Burglary', dict(JohnCalls=T),
+        burglary).show_approx() == 'False: 0.984, True: 0.0163'
     assert elimination_ask(
-            'Burglary', dict(MaryCalls=T),
-            burglary).show_approx() == 'False: 0.944, True: 0.0561'
+        'Burglary', dict(MaryCalls=T),
+        burglary).show_approx() == 'False: 0.944, True: 0.0561'
 
 
 def test_prior_sample():
@@ -189,80 +192,80 @@ def test_prior_sample2():
 def test_rejection_sampling():
     random.seed(47)
     assert rejection_sampling(
-            'Burglary', dict(JohnCalls=T, MaryCalls=T),
-            burglary, 10000).show_approx() == 'False: 0.7, True: 0.3'
+        'Burglary', dict(JohnCalls=T, MaryCalls=T),
+        burglary, 10000).show_approx() == 'False: 0.7, True: 0.3'
     assert rejection_sampling(
-            'Burglary', dict(JohnCalls=T, MaryCalls=F),
-            burglary, 10000).show_approx() == 'False: 1, True: 0'
+        'Burglary', dict(JohnCalls=T, MaryCalls=F),
+        burglary, 10000).show_approx() == 'False: 1, True: 0'
     assert rejection_sampling(
-            'Burglary', dict(JohnCalls=F, MaryCalls=T),
-            burglary, 10000).show_approx() == 'False: 0.987, True: 0.0128'
+        'Burglary', dict(JohnCalls=F, MaryCalls=T),
+        burglary, 10000).show_approx() == 'False: 0.987, True: 0.0128'
     assert rejection_sampling(
-            'Burglary', dict(JohnCalls=T),
-            burglary, 10000).show_approx() == 'False: 0.982, True: 0.0183'
+        'Burglary', dict(JohnCalls=T),
+        burglary, 10000).show_approx() == 'False: 0.982, True: 0.0183'
     assert rejection_sampling(
-            'Burglary', dict(MaryCalls=T),
-            burglary, 10000).show_approx() == 'False: 0.965, True: 0.0348'
+        'Burglary', dict(MaryCalls=T),
+        burglary, 10000).show_approx() == 'False: 0.965, True: 0.0348'
 
 
 def test_rejection_sampling2():
     random.seed(42)
     assert rejection_sampling(
-            'Cloudy', dict(Rain=T, Sprinkler=T),
-            sprinkler, 10000).show_approx() == 'False: 0.56, True: 0.44'
+        'Cloudy', dict(Rain=T, Sprinkler=T),
+        sprinkler, 10000).show_approx() == 'False: 0.56, True: 0.44'
     assert rejection_sampling(
-            'Cloudy', dict(Rain=T, Sprinkler=F),
-            sprinkler, 10000).show_approx() == 'False: 0.119, True: 0.881'
+        'Cloudy', dict(Rain=T, Sprinkler=F),
+        sprinkler, 10000).show_approx() == 'False: 0.119, True: 0.881'
     assert rejection_sampling(
-            'Cloudy', dict(Rain=F, Sprinkler=T),
-            sprinkler, 10000).show_approx() == 'False: 0.951, True: 0.049'
+        'Cloudy', dict(Rain=F, Sprinkler=T),
+        sprinkler, 10000).show_approx() == 'False: 0.951, True: 0.049'
     assert rejection_sampling(
-            'Cloudy', dict(Rain=T),
-            sprinkler, 10000).show_approx() == 'False: 0.205, True: 0.795'
+        'Cloudy', dict(Rain=T),
+        sprinkler, 10000).show_approx() == 'False: 0.205, True: 0.795'
     assert rejection_sampling(
-            'Cloudy', dict(Sprinkler=T),
-            sprinkler, 10000).show_approx() == 'False: 0.835, True: 0.165'
+        'Cloudy', dict(Sprinkler=T),
+        sprinkler, 10000).show_approx() == 'False: 0.835, True: 0.165'
 
 
 def test_likelihood_weighting():
     random.seed(1017)
     assert likelihood_weighting(
-            'Burglary', dict(JohnCalls=T, MaryCalls=T),
-            burglary, 10000).show_approx() == 'False: 0.702, True: 0.298'
+        'Burglary', dict(JohnCalls=T, MaryCalls=T),
+        burglary, 10000).show_approx() == 'False: 0.702, True: 0.298'
     assert likelihood_weighting(
-            'Burglary', dict(JohnCalls=T, MaryCalls=F),
-            burglary, 10000).show_approx() == 'False: 0.993, True: 0.00656'
+        'Burglary', dict(JohnCalls=T, MaryCalls=F),
+        burglary, 10000).show_approx() == 'False: 0.993, True: 0.00656'
     assert likelihood_weighting(
-            'Burglary', dict(JohnCalls=F, MaryCalls=T),
-            burglary, 10000).show_approx() == 'False: 0.996, True: 0.00363'
+        'Burglary', dict(JohnCalls=F, MaryCalls=T),
+        burglary, 10000).show_approx() == 'False: 0.996, True: 0.00363'
     assert likelihood_weighting(
-            'Burglary', dict(JohnCalls=F, MaryCalls=F),
-            burglary, 10000).show_approx() == 'False: 1, True: 0.000126'
+        'Burglary', dict(JohnCalls=F, MaryCalls=F),
+        burglary, 10000).show_approx() == 'False: 1, True: 0.000126'
     assert likelihood_weighting(
-            'Burglary', dict(JohnCalls=T),
-            burglary, 10000).show_approx() == 'False: 0.979, True: 0.0205'
+        'Burglary', dict(JohnCalls=T),
+        burglary, 10000).show_approx() == 'False: 0.979, True: 0.0205'
     assert likelihood_weighting(
-            'Burglary', dict(MaryCalls=T),
-            burglary, 10000).show_approx() == 'False: 0.94, True: 0.0601'
+        'Burglary', dict(MaryCalls=T),
+        burglary, 10000).show_approx() == 'False: 0.94, True: 0.0601'
 
 
 def test_likelihood_weighting2():
     random.seed(42)
     assert likelihood_weighting(
-            'Cloudy', dict(Rain=T, Sprinkler=T),
-            sprinkler, 10000).show_approx() == 'False: 0.559, True: 0.441'
+        'Cloudy', dict(Rain=T, Sprinkler=T),
+        sprinkler, 10000).show_approx() == 'False: 0.559, True: 0.441'
     assert likelihood_weighting(
-            'Cloudy', dict(Rain=T, Sprinkler=F),
-            sprinkler, 10000).show_approx() == 'False: 0.12, True: 0.88'
+        'Cloudy', dict(Rain=T, Sprinkler=F),
+        sprinkler, 10000).show_approx() == 'False: 0.12, True: 0.88'
     assert likelihood_weighting(
-            'Cloudy', dict(Rain=F, Sprinkler=T),
-            sprinkler, 10000).show_approx() == 'False: 0.951, True: 0.0486'
+        'Cloudy', dict(Rain=F, Sprinkler=T),
+        sprinkler, 10000).show_approx() == 'False: 0.951, True: 0.0486'
     assert likelihood_weighting(
-            'Cloudy', dict(Rain=T),
-            sprinkler, 10000).show_approx() == 'False: 0.198, True: 0.802'
+        'Cloudy', dict(Rain=T),
+        sprinkler, 10000).show_approx() == 'False: 0.198, True: 0.802'
     assert likelihood_weighting(
-            'Cloudy', dict(Sprinkler=T),
-            sprinkler, 10000).show_approx() == 'False: 0.833, True: 0.167'
+        'Cloudy', dict(Sprinkler=T),
+        sprinkler, 10000).show_approx() == 'False: 0.833, True: 0.167'
 
 
 def test_forward_backward():
@@ -278,8 +281,23 @@ def test_forward_backward():
 
     umbrella_evidence = [T, F, T, F, T]
     assert rounder(forward_backward(umbrellaHMM, umbrella_evidence, umbrella_prior)) == [
-            [0.5871, 0.4129], [0.7177, 0.2823], [0.2324, 0.7676], [0.6072, 0.3928],
-            [0.2324, 0.7676], [0.7177, 0.2823]]
+        [0.5871, 0.4129], [0.7177, 0.2823], [0.2324, 0.7676], [0.6072, 0.3928],
+        [0.2324, 0.7676], [0.7177, 0.2823]]
+
+
+def test_viterbi():
+    umbrella_prior = [0.5, 0.5]
+    umbrella_transition = [[0.7, 0.3], [0.3, 0.7]]
+    umbrella_sensor = [[0.9, 0.2], [0.1, 0.8]]
+    umbrellaHMM = HiddenMarkovModel(umbrella_transition, umbrella_sensor)
+
+    umbrella_evidence = [T, T, F, T, T]
+    assert (rounder(viterbi(umbrellaHMM, umbrella_evidence, umbrella_prior)) ==
+            [0.8182, 0.5155, 0.1237, 0.0334, 0.0210])
+
+    umbrella_evidence = [T, F, T, F, T]
+    assert (rounder(viterbi(umbrellaHMM, umbrella_evidence, umbrella_prior)) ==
+            [0.8182, 0.1964, 0.053, 0.0154, 0.0042])
 
 
 def test_fixed_lag_smoothing():
@@ -318,7 +336,7 @@ def test_particle_filtering():
 
 
 def test_monte_carlo_localization():
-    ## TODO: Add tests for random motion/inaccurate sensors
+    # TODO: Add tests for random motion/inaccurate sensors
     random.seed('aima-python')
     m = MCLmap([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0],
                 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0],
@@ -339,7 +357,7 @@ def P_motion_sample(kin_state, v, w):
         orient = kin_state[2]
 
         # for simplicity the robot first rotates and then moves
-        orient = (orient + w)%4
+        orient = (orient + w) % 4
         for _ in range(orient):
             v = (v[1], -v[0])
         pos = vector_add(pos, v)
@@ -359,7 +377,7 @@ def P_sensor(x, y):
     a = {'v': (0, 0), 'w': 0}
     z = (2, 4, 1, 6)
     S = monte_carlo_localization(a, z, 1000, P_motion_sample, P_sensor, m)
-    grid = [[0]*17 for _ in range(11)]
+    grid = [[0] * 17 for _ in range(11)]
     for x, y, _ in S:
         if 0 <= x < 11 and 0 <= y < 17:
             grid[x][y] += 1
@@ -369,7 +387,7 @@ def P_sensor(x, y):
     a = {'v': (0, 1), 'w': 0}
     z = (2, 3, 5, 7)
     S = monte_carlo_localization(a, z, 1000, P_motion_sample, P_sensor, m, S)
-    grid = [[0]*17 for _ in range(11)]
+    grid = [[0] * 17 for _ in range(11)]
     for x, y, _ in S:
         if 0 <= x < 11 and 0 <= y < 17:
             grid[x][y] += 1