Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit b5c20af

Browse files
committed
Track num iterations for value/policy iteration
1 parent 96c68a7 commit b5c20af

File tree

1 file changed

+5
-0
lines changed

1 file changed

+5
-0
lines changed

mdp.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ def __init__(self, init, actlist, terminals, transitions={}, states=None, gamma=
3535
self.transitions = transitions
3636
self.gamma = gamma
3737
self.reward = {}
38+
self.iters_ = 0
3839

3940
def R(self, state):
4041
"""Return a numeric reward for this state."""
@@ -121,7 +122,9 @@ def value_iteration(mdp, epsilon=0.001):
121122
"""Solving an MDP by value iteration. [Figure 17.4]"""
122123
U1 = {s: 0 for s in mdp.states}
123124
R, T, gamma = mdp.R, mdp.T, mdp.gamma
125+
mdp.iters_ = 0
124126
while True:
127+
mdp.iters_ = mdp.iters_ + 1
125128
U = U1.copy()
126129
delta = 0
127130
for s in mdp.states:
@@ -152,7 +155,9 @@ def policy_iteration(mdp):
152155
"""Solve an MDP by policy iteration [Figure 17.7]"""
153156
U = {s: 0 for s in mdp.states}
154157
pi = {s: random.choice(mdp.actions(s)) for s in mdp.states}
158+
mdp.iters_ = 0
155159
while True:
160+
mdp.iters_ = mdp.iters_ + 1
156161
U = policy_evaluation(pi, U, mdp)
157162
unchanged = True
158163
for s in mdp.states:

0 commit comments

Comments
 (0)