|
| 1 | +from utils4e import ( |
| 2 | + argmax, element_wise_product, matrix_multiplication, |
| 3 | + vector_to_diagonal, vector_add, scalar_vector_product, inverse_matrix, |
| 4 | + weighted_sample_with_replacement, probability, normalize |
| 5 | +) |
| 6 | +from agents import Agent |
| 7 | +from probability import BayesNet |
| 8 | +import random |
| 9 | + |
| 10 | +# Making Simple Decisions (Chapter 15) |
| 11 | + |
| 12 | + |
| 13 | +class DecisionNetwork(BayesNet): |
| 14 | + """An abstract class for a decision network as a wrapper for a BayesNet. |
| 15 | + Represents an agent's current state, its possible actions, reachable states |
| 16 | + and utilities of those states.""" |
| 17 | + |
| 18 | + def __init__(self, action, infer): |
| 19 | + """action: a single action node |
| 20 | + infer: the preferred method to carry out inference on the given BayesNet""" |
| 21 | + super(DecisionNetwork, self).__init__() |
| 22 | + self.action = action |
| 23 | + self.infer = infer |
| 24 | + |
| 25 | + def best_action(self): |
| 26 | + """Return the best action in the network""" |
| 27 | + return self.action |
| 28 | + |
| 29 | + def get_utility(self, action, state): |
| 30 | + """Return the utility for a particular action and state in the network""" |
| 31 | + raise NotImplementedError |
| 32 | + |
| 33 | + def get_expected_utility(self, action, evidence): |
| 34 | + """Compute the expected utility given an action and evidence""" |
| 35 | + u = 0.0 |
| 36 | + prob_dist = self.infer(action, evidence, self).prob |
| 37 | + for item, _ in prob_dist.items(): |
| 38 | + u += prob_dist[item] * self.get_utility(action, item) |
| 39 | + |
| 40 | + return u |
| 41 | + |
| 42 | + |
| 43 | +class InformationGatheringAgent(Agent): |
| 44 | + """A simple information gathering agent. The agent works by repeatedly selecting |
| 45 | + the observation with the highest information value, until the cost of the next |
| 46 | + observation is greater than its expected benefit. [Figure 16.9]""" |
| 47 | + |
| 48 | + def __init__(self, decnet, infer, initial_evidence=None): |
| 49 | + """decnet: a decision network |
| 50 | + infer: the preferred method to carry out inference on the given decision network |
| 51 | + initial_evidence: initial evidence""" |
| 52 | + self.decnet = decnet |
| 53 | + self.infer = infer |
| 54 | + self.observation = initial_evidence or [] |
| 55 | + self.variables = self.decnet.nodes |
| 56 | + |
| 57 | + def integrate_percept(self, percept): |
| 58 | + """Integrate the given percept into the decision network""" |
| 59 | + raise NotImplementedError |
| 60 | + |
| 61 | + def execute(self, percept): |
| 62 | + """Execute the information gathering algorithm""" |
| 63 | + self.observation = self.integrate_percept(percept) |
| 64 | + vpis = self.vpi_cost_ratio(self.variables) |
| 65 | + j = argmax(vpis) |
| 66 | + variable = self.variables[j] |
| 67 | + |
| 68 | + if self.vpi(variable) > self.cost(variable): |
| 69 | + return self.request(variable) |
| 70 | + |
| 71 | + return self.decnet.best_action() |
| 72 | + |
| 73 | + def request(self, variable): |
| 74 | + """Return the value of the given random variable as the next percept""" |
| 75 | + raise NotImplementedError |
| 76 | + |
| 77 | + def cost(self, var): |
| 78 | + """Return the cost of obtaining evidence through tests, consultants or questions""" |
| 79 | + raise NotImplementedError |
| 80 | + |
| 81 | + def vpi_cost_ratio(self, variables): |
| 82 | + """Return the VPI to cost ratio for the given variables""" |
| 83 | + v_by_c = [] |
| 84 | + for var in variables: |
| 85 | + v_by_c.append(self.vpi(var) / self.cost(var)) |
| 86 | + return v_by_c |
| 87 | + |
| 88 | + def vpi(self, variable): |
| 89 | + """Return VPI for a given variable""" |
| 90 | + vpi = 0.0 |
| 91 | + prob_dist = self.infer(variable, self.observation, self.decnet).prob |
| 92 | + for item, _ in prob_dist.items(): |
| 93 | + post_prob = prob_dist[item] |
| 94 | + new_observation = list(self.observation) |
| 95 | + new_observation.append(item) |
| 96 | + expected_utility = self.decnet.get_expected_utility(variable, new_observation) |
| 97 | + vpi += post_prob * expected_utility |
| 98 | + |
| 99 | + vpi -= self.decnet.get_expected_utility(variable, self.observation) |
| 100 | + return vpi |
| 101 | + |
| 102 | + |
| 103 | +# _________________________________________________________________________ |
| 104 | +# chapter 25 Robotics |
| 105 | +# TODO: Implement continuous map for MonteCarlo similar to Fig25.10 from the book |
| 106 | + |
| 107 | + |
| 108 | +class MCLmap: |
| 109 | + """Map which provides probability distributions and sensor readings. |
| 110 | + Consists of discrete cells which are either an obstacle or empty""" |
| 111 | + def __init__(self, m): |
| 112 | + self.m = m |
| 113 | + self.nrows = len(m) |
| 114 | + self.ncols = len(m[0]) |
| 115 | + # list of empty spaces in the map |
| 116 | + self.empty = [(i, j) for i in range(self.nrows) for j in range(self.ncols) if not m[i][j]] |
| 117 | + |
| 118 | + def sample(self): |
| 119 | + """Returns a random kinematic state possible in the map""" |
| 120 | + pos = random.choice(self.empty) |
| 121 | + # 0N 1E 2S 3W |
| 122 | + orient = random.choice(range(4)) |
| 123 | + kin_state = pos + (orient,) |
| 124 | + return kin_state |
| 125 | + |
| 126 | + def ray_cast(self, sensor_num, kin_state): |
| 127 | + """Returns distace to nearest obstacle or map boundary in the direction of sensor""" |
| 128 | + pos = kin_state[:2] |
| 129 | + orient = kin_state[2] |
| 130 | + # sensor layout when orientation is 0 (towards North) |
| 131 | + # 0 |
| 132 | + # 3R1 |
| 133 | + # 2 |
| 134 | + delta = ((sensor_num % 2 == 0)*(sensor_num - 1), (sensor_num % 2 == 1)*(2 - sensor_num)) |
| 135 | + # sensor direction changes based on orientation |
| 136 | + for _ in range(orient): |
| 137 | + delta = (delta[1], -delta[0]) |
| 138 | + range_count = 0 |
| 139 | + while (0 <= pos[0] < self.nrows) and (0 <= pos[1] < self.nrows) and (not self.m[pos[0]][pos[1]]): |
| 140 | + pos = vector_add(pos, delta) |
| 141 | + range_count += 1 |
| 142 | + return range_count |
| 143 | + |
| 144 | + |
| 145 | +def monte_carlo_localization(a, z, N, P_motion_sample, P_sensor, m, S=None): |
| 146 | + """Monte Carlo localization algorithm from Fig 25.9""" |
| 147 | + |
| 148 | + def ray_cast(sensor_num, kin_state, m): |
| 149 | + return m.ray_cast(sensor_num, kin_state) |
| 150 | + |
| 151 | + M = len(z) |
| 152 | + W = [0]*N |
| 153 | + S_ = [0]*N |
| 154 | + W_ = [0]*N |
| 155 | + v = a['v'] |
| 156 | + w = a['w'] |
| 157 | + |
| 158 | + if S is None: |
| 159 | + S = [m.sample() for _ in range(N)] |
| 160 | + |
| 161 | + for i in range(N): |
| 162 | + S_[i] = P_motion_sample(S[i], v, w) |
| 163 | + W_[i] = 1 |
| 164 | + for j in range(M): |
| 165 | + z_ = ray_cast(j, S_[i], m) |
| 166 | + W_[i] = W_[i] * P_sensor(z[j], z_) |
| 167 | + |
| 168 | + S = weighted_sample_with_replacement(N, S_, W_) |
| 169 | + return S |
| 170 | + |
0 commit comments