Thanks to visit codestin.com
Credit goes to www.scribd.com

0% found this document useful (0 votes)
19 views4 pages

Q4.ipynb - Colab

Uploaded by

Rahul me20b145
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
19 views4 pages

Q4.ipynb - Colab

Uploaded by

Rahul me20b145
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 4

5/12/24, 8:37 AM Q4.

ipynb - Colab

1 import numpy as np
2 import time

1 class Agent:
2 def __init__(self, num_balls, num_matches):
3 self.total_wickets = 4
4 self.num_balls = num_balls
5 self.num_matches = num_matches
6 self.q_values = np.ndarray(
7 (self.num_balls, self.total_wickets, 6), dtype=np.float32
8 )
9 self.q_values.fill(0)
10
11 self.epsilon = 0.1
12 self.balls = 0
13 self.wickets = 0
14
15 self.alpha = 0.1
16 self.gamma = 0.9
17
18 self.last_action = None
19
20 self.policy_type = "e-greedy" # e-greedy, softmax
21 self.algo = "sarsa" # q-learning, sarsa
22
23 self.next_action = None
24
25 def policy(self):
26 if self.policy_type == "e-greedy":
27 p = np.random.rand()
28 if p < self.epsilon:
29 return np.random.randint(0, 6)
30 else:
31 return np.argmax(self.q_values[self.balls][self.wickets])
32
33 elif self.policy_type == "softmax":
34 if np.sum(np.exp(self.q_values[self.balls][self.wickets])) == 0:
35 return np.random.randint(0, 6)
36
37 probabilities = np.exp(self.q_values[self.balls][self.wickets]) / np.sum(
38 np.exp(self.q_values[self.balls][self.wickets])
39 )
40 return np.random.choice(range(6), p=probabilities)
41
42 def get_action(self, wicket, runs_scored):
43 if self.balls == 0:
44 self.last_action = self.policy()
45 self.balls += 1
46 return self.last_action
47
48 self.wickets = self.wickets + wicket
49 self.balls += 1
50
51 if self.balls == self.num_balls or self.wickets == self.total_wickets:
52 self.balls = 0
53 self.wickets = 0
54 self.last_action = None
55 self.next_action = None
56
57 self.last_action = self.policy()
58 self.balls += 1
59 return self.last_action
60
61 new_q_value = 0
62 if self.algo == "q-learning":
63 new_q_value = (
64 runs_scored
65 - 0.5
66 + self.gamma * np.max(self.q_values[self.balls][self.wickets])
67 - self.q values[self.balls - 1][self.wickets - wicket][self.last action]
https://colab.research.google.com/drive/1_qSwQRhp60THO4Tt1TbNkUKWXDEbqrI0#printMode=true 1/4
5/12/24, 8:37 AM Q4.ipynb - Colab
67 self.q_values[self.balls 1][self.wickets wicket][self.last_action]
68 )
69 elif self.algo == "sarsa":
70 self.next_action = self.policy()
71 new_q_value = (
72 runs_scored
73 - 0.5
74 + self.gamma * self.q_values[self.balls][self.wickets][self.next_action]
75 - self.q_values[self.balls - 1][self.wickets - wicket][self.last_action]
76 )
77
78 self.q_values[self.balls - 1][self.wickets - wicket][self.last_action] += (
79 self.alpha * new_q_value
80 )
81
82 if self.algo == "q-learning":
83 self.next_action = self.policy()
84
85 self.last_action = self.next_action
86 return self.last_action

https://colab.research.google.com/drive/1_qSwQRhp60THO4Tt1TbNkUKWXDEbqrI0#printMode=true 2/4
5/12/24, 8:37 AM Q4.ipynb - Colab
1 wickets = 4
2
3
4 class Environment:
5 def __init__(self, num_balls, agent):
6 self.num_balls = num_balls
7 self.agent = agent
8 self.__run_time = 0
9 self.__total_runs = 0
10 self.__total_wickets = 0
11 self.__runs_scored = 0
12 self.__start_time = 0
13 self.__end_time = 0
14 self.__p_out = np.array([0.001, 0.01, 0.02, 0.03, 0.1, 0.3])
15 self.__p_run = np.array([1, 0.9, 0.85, 0.8, 0.75, 0.7])
16 self.__action_runs_map = np.array([0, 1, 2, 3, 4, 6])
17 self.__wickets_left = wickets
18 self.__wicket = 0
19 self.__runs_scored = 0
20 self.__start_time = 0
21 self.__end_time = 0
22 self.__batting_order = np.array([0, 1, 2, 3])
23
24 def __get_action(self):
25 self.__start_time = time.time()
26 action = self.agent.get_action(self.__wicket, self.__runs_scored)
27 self.__end_time = time.time()
28 self.__run_time = self.__run_time + self.__end_time - self.__start_time
29 return action
30
31 def __get_outcome(self, action):
32 pout = self.__p_out[action]
33 prun = self.__p_run[action]
34 wicket = np.random.choice(2, 1, p=[1 - pout, pout])[0]
35 runs = 0
36 if wicket == 0:
37 runs = (
38 self.__action_runs_map[action]
39 * np.random.choice(2, 1, p=[1 - prun, prun])[0]
40 )
41 return wicket, runs
42
43 def innings(self):
44 self.__wickets_left = wickets
45 self.__runs_scored = 0
46 self.__total_runs = 0
47 self.__total_wickets = 0
48 self.__run_time = 0
49 self.__start_time = 0
50 self.__end_time = 0
51
52 for ball in range(self.num_balls):
53 if self.__wickets_left > 0:
54 action = self.__get_action()
55 self.__wicket, self.__runs_scored = self.__get_outcome(action)
56 self.__total_runs = self.__total_runs + self.__runs_scored
57 if self.__wicket > 0:
58 self.__wickets_left = self.__wickets_left - 1
59 self.__total_wickets = self.__total_wickets + self.__wicket
60 if self.__wickets_left == 0:
61 self.__get_action()
62 return self.__total_runs, self.__total_wickets, self.__run_time

https://colab.research.google.com/drive/1_qSwQRhp60THO4Tt1TbNkUKWXDEbqrI0#printMode=true 3/4
5/12/24, 8:37 AM Q4.ipynb - Colab
1 num_matches = 10000
2 num_balls = 60
3 agent = Agent(num_balls, num_matches)
4 environment = Environment(num_balls, agent)
5 score = np.zeros((num_matches, 1))
6 run_time = np.zeros((num_matches, 1))
7 wicket = np.zeros((num_matches, 1))
8
9 last_100_avgs = []
10 for i in range(num_matches):
11 score[i], wicket[i], run_time[i] = environment.innings()
12 last_100_avgs.append(score[i])
13 if (i + 1) % 100 == 0:
14 print("Match: ", i + 1, "Average: ", np.mean(last_100_avgs))
15 last_100_avgs = []

Match: 100 Average: 78.11


Match: 200 Average: 85.15
Match: 300 Average: 84.2
Match: 400 Average: 84.91
Match: 500 Average: 85.31
Match: 600 Average: 87.86
Match: 700 Average: 80.53
Match: 800 Average: 82.99
Match: 900 Average: 85.48
Match: 1000 Average: 83.93
Match: 1100 Average: 81.83
Match: 1200 Average: 84.81
Match: 1300 Average: 89.8
Match: 1400 Average: 90.11
Match: 1500 Average: 86.59
Match: 1600 Average: 91.43
Match: 1700 Average: 84.69
Match: 1800 Average: 85.3
Match: 1900 Average: 94.17
Match: 2000 Average: 87.65
Match: 2100 Average: 87.38

https://colab.research.google.com/drive/1_qSwQRhp60THO4Tt1TbNkUKWXDEbqrI0#printMode=true 4/4

You might also like