@@ -64,6 +64,22 @@ def display(gridmdp, _height, _width):
64
64
65
65
dialog .mainloop ()
66
66
67
+ def display_best_policy (_best_policy , _height , _width ):
68
+ ''' displays best policy '''
69
+
70
+ dialog = tk .Toplevel ()
71
+ dialog .wm_title ('Best Policy' )
72
+
73
+ container = tk .Frame (dialog )
74
+ container .pack (side = tk .TOP , fill = tk .BOTH , expand = True )
75
+
76
+ for i in range (max (1 , _height )):
77
+ for j in range (max (1 , _width )):
78
+ label = ttk .Label (container , text = _best_policy [i ][j ], font = ('Helvetica' , 12 , 'bold' ))
79
+ label .grid (row = i + 1 , column = j + 1 , padx = 3 , pady = 3 )
80
+
81
+ dialog .mainloop ()
82
+
67
83
def initialize_dialogbox (_width , _height , gridmdp , terminals , buttons ):
68
84
''' creates dialogbox for initialization '''
69
85
@@ -98,7 +114,7 @@ def initialize_dialogbox(_width, _height, gridmdp, terminals, buttons):
98
114
99
115
btn_apply = ttk .Button (container , text = 'Apply' , command = partial (initialize_update_table , _width , _height , gridmdp , terminals , buttons , reward , term , wall , label_reward , entry_reward , rbtn_term , rbtn_wall ))
100
116
btn_apply .grid (row = 5 , column = 0 , sticky = 'nsew' , pady = 5 , padx = 5 )
101
- btn_reset = ttk .Button (container , text = 'Reset' , command = partial (initialize_reset_all , _width , _height , gridmdp , terminals , buttons , label_reward , entry_reward , rbtn_wall , rbtn_term ))
117
+ btn_reset = ttk .Button (container , text = 'Reset' , command = partial (initialize_reset_all , _width , _height , gridmdp , terminals , buttons , reward , term , wall , label_reward , entry_reward , rbtn_wall , rbtn_term ))
102
118
btn_reset .grid (row = 5 , column = 1 , sticky = 'nsew' , pady = 5 , padx = 5 )
103
119
btn_ok = ttk .Button (container , text = 'Ok' , command = dialog .destroy )
104
120
btn_ok .grid (row = 5 , column = 2 , sticky = 'nsew' , pady = 5 , padx = 5 )
@@ -146,9 +162,12 @@ def initialize_update_table(_width, _height, gridmdp, terminals, buttons, reward
146
162
for j in range (max (1 , _width )):
147
163
update_table (i , j , gridmdp , terminals , buttons , reward , term , wall , label_reward , entry_reward , rbtn_term , rbtn_wall )
148
164
149
- def reset_all (_height , i , j , gridmdp , terminals , buttons , label_reward , entry_reward , rbtn_wall , rbtn_term ):
165
+ def reset_all (_height , i , j , gridmdp , terminals , buttons , reward , term , wall , label_reward , entry_reward , rbtn_wall , rbtn_term ):
150
166
''' functionality for reset button '''
151
167
168
+ reward .set (0.0 )
169
+ term .set (0 )
170
+ wall .set (0 )
152
171
gridmdp [i ][j ] = 0.0
153
172
buttons [i ][j ].configure (style = 'TButton' )
154
173
buttons [i ][j ].config (text = f'({ _height - i - 1 } , { j } )' )
@@ -163,12 +182,12 @@ def reset_all(_height, i, j, gridmdp, terminals, buttons, label_reward, entry_re
163
182
rbtn_wall .state (['!focus' , '!selected' ])
164
183
rbtn_term .state (['!focus' , '!selected' ])
165
184
166
- def initialize_reset_all (_width , _height , gridmdp , terminals , buttons , label_reward , entry_reward , rbtn_wall , rbtn_term ):
185
+ def initialize_reset_all (_width , _height , gridmdp , terminals , buttons , reward , term , wall , label_reward , entry_reward , rbtn_wall , rbtn_term ):
167
186
''' runs reset_all for all cells '''
168
187
169
188
for i in range (max (1 , _height )):
170
189
for j in range (max (1 , _width )):
171
- reset_all (_height , i , j , gridmdp , terminals , buttons , label_reward , entry_reward , rbtn_wall , rbtn_term )
190
+ reset_all (_height , i , j , gridmdp , terminals , buttons , reward , term , wall , label_reward , entry_reward , rbtn_wall , rbtn_term )
172
191
173
192
def external_reset (_width , _height , gridmdp , terminals , buttons ):
174
193
''' reset from edit menu '''
@@ -263,7 +282,7 @@ def dialogbox(i, j, gridmdp, terminals, buttons, _height):
263
282
264
283
btn_apply = ttk .Button (container , text = 'Apply' , command = partial (update_table , i , j , gridmdp , terminals , buttons , reward , term , wall , label_reward , entry_reward , rbtn_term , rbtn_wall ))
265
284
btn_apply .grid (row = 5 , column = 0 , sticky = 'nsew' , pady = 5 , padx = 5 )
266
- btn_reset = ttk .Button (container , text = 'Reset' , command = partial (reset_all , _height , i , j , gridmdp , terminals , buttons , label_reward , entry_reward , rbtn_wall , rbtn_term ))
285
+ btn_reset = ttk .Button (container , text = 'Reset' , command = partial (reset_all , _height , i , j , gridmdp , terminals , buttons , reward , term , wall , label_reward , entry_reward , rbtn_wall , rbtn_term ))
267
286
btn_reset .grid (row = 5 , column = 1 , sticky = 'nsew' , pady = 5 , padx = 5 )
268
287
btn_ok = ttk .Button (container , text = 'Ok' , command = dialog .destroy )
269
288
btn_ok .grid (row = 5 , column = 2 , sticky = 'nsew' , pady = 5 , padx = 5 )
@@ -595,6 +614,9 @@ def animate_graph(self, i):
595
614
if (self .delta < self .epsilon * (1 - self .gamma ) / self .gamma ) or (self .iterations > 60 ) and self .terminated == False :
596
615
self .terminated = True
597
616
display (self .grid_to_show , self ._height , self ._width )
617
+
618
+ pi = best_policy (self .sequential_decision_environment , value_iteration (self .sequential_decision_environment , .01 ))
619
+ display_best_policy (self .sequential_decision_environment .to_arrows (pi ), self ._height , self ._width )
598
620
599
621
ax = fig .gca ()
600
622
ax .xaxis .set_major_locator (MaxNLocator (integer = True ))
0 commit comments