|
| 1 | +"""Games, or Adversarial Search. (Chapters 6) |
| 2 | +
|
| 3 | +""" |
| 4 | + |
| 5 | +from utils import * |
| 6 | +import random |
| 7 | + |
| 8 | +#______________________________________________________________________________ |
| 9 | +# Minimax Search |
| 10 | + |
| 11 | +def minimax_decision(state, game): |
| 12 | + """Given a state in a game, calculate the best move by searching |
| 13 | + forward all the way to the terminal states. [Fig. 6.4]""" |
| 14 | + |
| 15 | + player = game.to_move(state) |
| 16 | + |
| 17 | + def max_value(state): |
| 18 | + if game.terminal_test(state): |
| 19 | + return game.utility(state, player) |
| 20 | + v = -infinity |
| 21 | + for (a, s) in game.successors(state): |
| 22 | + v = max(v, min_value(s)) |
| 23 | + return v |
| 24 | + |
| 25 | + def min_value(state): |
| 26 | + if game.terminal_test(state): |
| 27 | + return game.utility(state, player) |
| 28 | + v = infinity |
| 29 | + for (a, s) in game.successors(state): |
| 30 | + v = min(v, max_value(s)) |
| 31 | + return v |
| 32 | + |
| 33 | + # Body of minimax_decision starts here: |
| 34 | + action, state = argmax(game.successors(state), |
| 35 | + lambda ((a, s)): min_value(s)) |
| 36 | + return action |
| 37 | + |
| 38 | + |
| 39 | +#______________________________________________________________________________ |
| 40 | + |
| 41 | +def alphabeta_full_search(state, game): |
| 42 | + """Search game to determine best action; use alpha-beta pruning. |
| 43 | + As in [Fig. 6.7], this version searches all the way to the leaves.""" |
| 44 | + |
| 45 | + player = game.to_move(state) |
| 46 | + |
| 47 | + def max_value(state, alpha, beta): |
| 48 | + if game.terminal_test(state): |
| 49 | + return game.utility(state, player) |
| 50 | + v = -infinity |
| 51 | + for (a, s) in game.successors(state): |
| 52 | + v = max(v, min_value(s, alpha, beta)) |
| 53 | + if v >= beta: |
| 54 | + return v |
| 55 | + alpha = max(alpha, v) |
| 56 | + return v |
| 57 | + |
| 58 | + def min_value(state, alpha, beta): |
| 59 | + if game.terminal_test(state): |
| 60 | + return game.utility(state, player) |
| 61 | + v = infinity |
| 62 | + for (a, s) in game.successors(state): |
| 63 | + v = min(v, max_value(s, alpha, beta)) |
| 64 | + if v <= alpha: |
| 65 | + return v |
| 66 | + beta = min(beta, v) |
| 67 | + return v |
| 68 | + |
| 69 | + # Body of alphabeta_search starts here: |
| 70 | + action, state = argmax(game.successors(state), |
| 71 | + lambda ((a, s)): min_value(s, -infinity, infinity)) |
| 72 | + return action |
| 73 | + |
| 74 | +def alphabeta_search(state, game, d=4, cutoff_test=None, eval_fn=None): |
| 75 | + """Search game to determine best action; use alpha-beta pruning. |
| 76 | + This version cuts off search and uses an evaluation function.""" |
| 77 | + |
| 78 | + player = game.to_move(state) |
| 79 | + |
| 80 | + def max_value(state, alpha, beta, depth): |
| 81 | + if cutoff_test(state, depth): |
| 82 | + return eval_fn(state) |
| 83 | + v = -infinity |
| 84 | + for (a, s) in game.successors(state): |
| 85 | + v = max(v, min_value(s, alpha, beta, depth+1)) |
| 86 | + if v >= beta: |
| 87 | + return v |
| 88 | + alpha = max(alpha, v) |
| 89 | + return v |
| 90 | + |
| 91 | + def min_value(state, alpha, beta, depth): |
| 92 | + if cutoff_test(state, depth): |
| 93 | + return eval_fn(state) |
| 94 | + v = infinity |
| 95 | + for (a, s) in game.successors(state): |
| 96 | + v = min(v, max_value(s, alpha, beta, depth+1)) |
| 97 | + if v <= alpha: |
| 98 | + return v |
| 99 | + beta = min(beta, v) |
| 100 | + return v |
| 101 | + |
| 102 | + # Body of alphabeta_search starts here: |
| 103 | + # The default test cuts off at depth d or at a terminal state |
| 104 | + cutoff_test = (cutoff_test or |
| 105 | + (lambda state,depth: depth>d or game.terminal_test(state))) |
| 106 | + eval_fn = eval_fn or (lambda state: game.utility(state, player)) |
| 107 | + action, state = argmax(game.successors(state), |
| 108 | + lambda ((a, s)): min_value(s, -infinity, infinity, 0)) |
| 109 | + return action |
| 110 | + |
| 111 | +#______________________________________________________________________________ |
| 112 | +# Players for Games |
| 113 | + |
| 114 | +def query_player(game, state): |
| 115 | + "Make a move by querying standard input." |
| 116 | + game.display(state) |
| 117 | + return num_or_str(raw_input('Your move? ')) |
| 118 | + |
| 119 | +def random_player(game, state): |
| 120 | + "A player that chooses a legal move at random." |
| 121 | + return random.choice(game.legal_moves()) |
| 122 | + |
| 123 | +def alphabeta_player(game, state): |
| 124 | + return alphabeta_search(state, game) |
| 125 | + |
| 126 | +def play_game(game, *players): |
| 127 | + "Play an n-person, move-alternating game." |
| 128 | + state = game.initial |
| 129 | + while True: |
| 130 | + for player in players: |
| 131 | + move = player(game, state) |
| 132 | + state = game.make_move(move, state) |
| 133 | + if game.terminal_test(state): |
| 134 | + return game.utility(state, players[0]) |
| 135 | + |
| 136 | +#______________________________________________________________________________ |
| 137 | +# Some Sample Games |
| 138 | + |
| 139 | +class Game: |
| 140 | + """A game is similar to a problem, but it has a utility for each |
| 141 | + state and a terminal test instead of a path cost and a goal |
| 142 | + test. To create a game, subclass this class and implement |
| 143 | + legal_moves, make_move, utility, and terminal_test. You may |
| 144 | + override display and successors or you can inherit their default |
| 145 | + methods. You will also need to set the .initial attribute to the |
| 146 | + initial state; this can be done in the constructor.""" |
| 147 | + |
| 148 | + def legal_moves(self, state): |
| 149 | + "Return a list of the allowable moves at this point." |
| 150 | + abstract |
| 151 | + |
| 152 | + def make_move(self, move, state): |
| 153 | + "Return the state that results from making a move from a state." |
| 154 | + abstract |
| 155 | + |
| 156 | + def utility(self, state, player): |
| 157 | + "Return the value of this final state to player." |
| 158 | + abstract |
| 159 | + |
| 160 | + def terminal_test(self, state): |
| 161 | + "Return True if this is a final state for the game." |
| 162 | + return not self.legal_moves(state) |
| 163 | + |
| 164 | + def to_move(self, state): |
| 165 | + "Return the player whose move it is in this state." |
| 166 | + return state.to_move |
| 167 | + |
| 168 | + def display(self, state): |
| 169 | + "Print or otherwise display the state." |
| 170 | + print state |
| 171 | + |
| 172 | + def successors(self, state): |
| 173 | + "Return a list of legal (move, state) pairs." |
| 174 | + return [(move, self.make_move(move, state)) |
| 175 | + for move in self.legal_moves(state)] |
| 176 | + |
| 177 | + def __repr__(self): |
| 178 | + return '<%s>' % self.__class__.__name__ |
| 179 | + |
| 180 | +class Fig62Game(Game): |
| 181 | + """The game represented in [Fig. 6.2]. Serves as a simple test case. |
| 182 | + >>> g = Fig62Game() |
| 183 | + >>> minimax_decision('A', g) |
| 184 | + 'a1' |
| 185 | + >>> alphabeta_full_search('A', g) |
| 186 | + 'a1' |
| 187 | + >>> alphabeta_search('A', g) |
| 188 | + 'a1' |
| 189 | + """ |
| 190 | + succs = {'A': [('a1', 'B'), ('a2', 'C'), ('a3', 'D')], |
| 191 | + 'B': [('b1', 'B1'), ('b2', 'B2'), ('b3', 'B3')], |
| 192 | + 'C': [('c1', 'C1'), ('c2', 'C2'), ('c3', 'C3')], |
| 193 | + 'D': [('d1', 'D1'), ('d2', 'D2'), ('d3', 'D3')]} |
| 194 | + utils = Dict(B1=3, B2=12, B3=8, C1=2, C2=4, C3=6, D1=14, D2=5, D3=2) |
| 195 | + initial = 'A' |
| 196 | + |
| 197 | + def successors(self, state): |
| 198 | + return self.succs.get(state, []) |
| 199 | + |
| 200 | + def utility(self, state, player): |
| 201 | + if player == 'MAX': |
| 202 | + return self.utils[state] |
| 203 | + else: |
| 204 | + return -self.utils[state] |
| 205 | + |
| 206 | + def terminal_test(self, state): |
| 207 | + return state not in ('A', 'B', 'C', 'D') |
| 208 | + |
| 209 | + def to_move(self, state): |
| 210 | + return if_(state in 'BCD', 'MIN', 'MAX') |
| 211 | + |
| 212 | +class TicTacToe(Game): |
| 213 | + """Play TicTacToe on an h x v board, with Max (first player) playing 'X'. |
| 214 | + A state has the player to move, a cached utility, a list of moves in |
| 215 | + the form of a list of (x, y) positions, and a board, in the form of |
| 216 | + a dict of {(x, y): Player} entries, where Player is 'X' or 'O'.""" |
| 217 | + def __init__(self, h=3, v=3, k=3): |
| 218 | + update(self, h=h, v=v, k=k) |
| 219 | + moves = [(x, y) for x in range(1, h+1) |
| 220 | + for y in range(1, v+1)] |
| 221 | + self.initial = Struct(to_move='X', utility=0, board={}, moves=moves) |
| 222 | + |
| 223 | + def legal_moves(self, state): |
| 224 | + "Legal moves are any square not yet taken." |
| 225 | + return state.moves |
| 226 | + |
| 227 | + def make_move(self, move, state): |
| 228 | + if move not in state.moves: |
| 229 | + return state # Illegal move has no effect |
| 230 | + board = state.board.copy(); board[move] = state.to_move |
| 231 | + moves = list(state.moves); moves.remove(move) |
| 232 | + return Struct(to_move=if_(state.to_move == 'X', 'O', 'X'), |
| 233 | + utility=self.compute_utility(board, move, state.to_move), |
| 234 | + board=board, moves=moves) |
| 235 | + |
| 236 | + def utility(self, state): |
| 237 | + "Return the value to X; 1 for win, -1 for loss, 0 otherwise." |
| 238 | + return state.utility |
| 239 | + |
| 240 | + def terminal_test(self, state): |
| 241 | + "A state is terminal if it is won or there are no empty squares." |
| 242 | + return state.utility != 0 or len(state.moves) == 0 |
| 243 | + |
| 244 | + def display(self, state): |
| 245 | + board = state.board |
| 246 | + for x in range(1, self.h+1): |
| 247 | + for y in range(1, self.v+1): |
| 248 | + print board.get((x, y), '.'), |
| 249 | + print |
| 250 | + |
| 251 | + def compute_utility(self, board, move, player): |
| 252 | + "If X wins with this move, return 1; if O return -1; else return 0." |
| 253 | + if (self.k_in_row(board, move, player, (0, 1)) or |
| 254 | + self.k_in_row(board, move, player, (1, 0)) or |
| 255 | + self.k_in_row(board, move, player, (1, -1)) or |
| 256 | + self.k_in_row(board, move, player, (1, 1))): |
| 257 | + return if_(player == 'X', +1, -1) |
| 258 | + else: |
| 259 | + return 0 |
| 260 | + |
| 261 | + def k_in_row(self, board, move, player, (delta_x, delta_y)): |
| 262 | + "Return true if there is a line through move on board for player." |
| 263 | + x, y = move |
| 264 | + n = 0 # n is number of moves in row |
| 265 | + while board.get((x, y)) == player: |
| 266 | + n += 1 |
| 267 | + x, y = x + delta_x, y + delta_y |
| 268 | + x, y = move |
| 269 | + while board.get((x, y)) == player: |
| 270 | + n += 1 |
| 271 | + x, y = x - delta_x, y - delta_y |
| 272 | + n -= 1 # Because we counted move itself twice |
| 273 | + return n >= self.k |
| 274 | + |
| 275 | +class ConnectFour(TicTacToe): |
| 276 | + """A TicTacToe-like game in which you can only make a move on the bottom |
| 277 | + row, or in a square directly above an occupied square. Traditionally |
| 278 | + played on a 7x6 board and requiring 4 in a row.""" |
| 279 | + |
| 280 | + def __init__(self, h=7, v=6, k=4): |
| 281 | + TicTacToe.__init__(self, h, v, k) |
| 282 | + |
| 283 | + def legal_moves(self, state): |
| 284 | + "Legal moves are any square not yet taken." |
| 285 | + return [(x, y) for (x, y) in state.moves |
| 286 | + if y == 0 or (x, y-1) in state.board] |
0 commit comments