-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathPolicy.py
More file actions
43 lines (34 loc) · 1.11 KB
/
Copy pathPolicy.py
File metadata and controls
43 lines (34 loc) · 1.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import numpy as np
from collections import defaultdict
def epsilon_greedy(env, state, q, e = 0.95):
pi = defaultdict(lambda : 0)
actions = env.A(state)
best_action_idx = 0
for idx, action in enumerate(actions):
if(q[(state,action)] > q[(state,actions[best_action_idx])]):
best_action_idx = idx
for idx, action in enumerate(actions):
if(idx == best_action_idx):
pi[action] = 0.95
else:
pi[action] = (1-e)/(len(actions)-1)
return pi
def greedy(env, state, q):
pi = defaultdict(lambda : 0)
actions = env.A(state)
best_action_idx = 0
for idx, action in enumerate(actions):
if(q[(state,action)] > q[(state,actions[best_action_idx])]):
best_action_idx = idx
for idx, action in enumerate(actions):
if(idx == best_action_idx):
pi[action] = 1
else:
pi[action] = 0
return pi
def random(env, state, q):
pi = defaultdict(lambda : 0)
actions = env.A(state)
for action in actions:
pi[action] = 1/len(actions)
return pi