from Agenda
+ self.agenda.remove((G, act1))
+
+ # For actions with variable number of arguments, use least commitment principle
+ # act0_temp, bindings = self.find_action_for_precondition(G)
+ # act0 = self.generate_action_object(act0_temp, bindings)
+
+ # Actions = Actions U {act0}
+ self.actions.add(act0)
+
+ # Constraints = add_const(start < act0, Constraints)
+ self.constraints = self.add_const((self.start, act0), self.constraints)
+
+ # for each CL E CausalLinks do
+ # Constraints = protect(CL, act0, Constraints)
+ for causal_link in self.causal_links:
+ self.constraints = self.protect(causal_link, act0, self.constraints)
+
+ # Agenda = Agenda U {: P is a precondition of act0}
+ for precondition in act0.precond:
+ self.agenda.add((precondition, act0))
+
+ # Constraints = add_const(act0 < act1, Constraints)
+ self.constraints = self.add_const((act0, act1), self.constraints)
+
+ # CausalLinks U {}
+ if (act0, G, act1) not in self.causal_links:
+ self.causal_links.append((act0, G, act1))
+
+ # for each A E Actions do
+ # Constraints = protect(, A, Constraints)
+ for action in self.actions:
+ self.constraints = self.protect((act0, G, act1), action, self.constraints)
+
+ if step > 200:
+ print("Couldn't find a solution")
+ return None, None
+
+ if display:
+ self.display_plan()
+ else:
+ return self.constraints, self.causal_links
+
+
+def spare_tire_graphPlan():
+ """Solves the spare tire problem using GraphPlan"""
+ return GraphPlan(spare_tire()).execute()
+
+
+def three_block_tower_graphPlan():
+ """Solves the Sussman Anomaly problem using GraphPlan"""
+ return GraphPlan(three_block_tower()).execute()
+
+
+def air_cargo_graphPlan():
+ """Solves the air cargo problem using GraphPlan"""
+ return GraphPlan(air_cargo()).execute()
+
+
+def have_cake_and_eat_cake_too_graphPlan():
+ """Solves the cake problem using GraphPlan"""
+ return [GraphPlan(have_cake_and_eat_cake_too()).execute()[1]]
+
+
+def shopping_graphPlan():
+ """Solves the shopping problem using GraphPlan"""
+ return GraphPlan(shopping_problem()).execute()
+
+
+def socks_and_shoes_graphPlan():
+ """Solves the socks and shoes problem using GraphPlan"""
+ return GraphPlan(socks_and_shoes()).execute()
+
+
+def simple_blocks_world_graphPlan():
+ """Solves the simple blocks world problem"""
+ return GraphPlan(simple_blocks_world()).execute()
+
+
+class HLA(Action):
+ """
+ Define Actions for the real-world (that may be refined further), and satisfy resource
+ constraints.
+ """
+ unique_group = 1
+
+ def __init__(self, action, precond=None, effect=None, duration=0, consume=None, use=None):
+ """
+ As opposed to actions, to define HLA, we have added constraints.
+ duration holds the amount of time required to execute the task
+ consumes holds a dictionary representing the resources the task consumes
+ uses holds a dictionary representing the resources the task uses
+ """
+ precond = precond or [None]
+ effect = effect or [None]
+ super().__init__(action, precond, effect)
+ self.duration = duration
+ self.consumes = consume or {}
+ self.uses = use or {}
+ self.completed = False
+ # self.priority = -1 # must be assigned in relation to other HLAs
+ # self.job_group = -1 # must be assigned in relation to other HLAs
+
+ def do_action(self, job_order, available_resources, kb, args):
+ """
+ An HLA based version of act - along with knowledge base updation, it handles
+ resource checks, and ensures the actions are executed in the correct order.
+ """
+ if not self.has_usable_resource(available_resources):
+ raise Exception('Not enough usable resources to execute {}'.format(self.name))
+ if not self.has_consumable_resource(available_resources):
+ raise Exception('Not enough consumable resources to execute {}'.format(self.name))
+ if not self.inorder(job_order):
+ raise Exception("Can't execute {} - execute prerequisite actions first".
+ format(self.name))
+ kb = super().act(kb, args) # update knowledge base
+ for resource in self.consumes: # remove consumed resources
+ available_resources[resource] -= self.consumes[resource]
+ self.completed = True # set the task status to complete
+ return kb
+
+ def has_consumable_resource(self, available_resources):
+ """
+ Ensure there are enough consumable resources for this action to execute.
+ """
+ for resource in self.consumes:
+ if available_resources.get(resource) is None:
+ return False
+ if available_resources[resource] < self.consumes[resource]:
+ return False
+ return True
+
+ def has_usable_resource(self, available_resources):
+ """
+ Ensure there are enough usable resources for this action to execute.
+ """
+ for resource in self.uses:
+ if available_resources.get(resource) is None:
+ return False
+ if available_resources[resource] < self.uses[resource]:
+ return False
+ return True
+
+ def inorder(self, job_order):
+ """
+ Ensure that all the jobs that had to be executed before the current one have been
+ successfully executed.
+ """
+ for jobs in job_order:
+ if self in jobs:
+ for job in jobs:
+ if job is self:
+ return True
+ if not job.completed:
+ return False
+ return True
+
+
+class RealWorldPlanningProblem(PlanningProblem):
+ """
+ Define real-world problems by aggregating resources as numerical quantities instead of
+ named entities.
+
+ This class is identical to PDDL, except that it overloads the act function to handle
+ resource and ordering conditions imposed by HLA as opposed to Action.
+ """
+
+ def __init__(self, initial, goals, actions, jobs=None, resources=None):
+ super().__init__(initial, goals, actions)
+ self.jobs = jobs
+ self.resources = resources or {}
+
+ def act(self, action):
+ """
+ Performs the HLA given as argument.
+
+ Note that this is different from the superclass action - where the parameter was an
+ Expression. For real world problems, an Expr object isn't enough to capture all the
+ detail required for executing the action - resources, preconditions, etc need to be
+ checked for too.
+ """
+ args = action.args
+ list_action = first(a for a in self.actions if a.name == action.name)
+ if list_action is None:
+ raise Exception("Action '{}' not found".format(action.name))
+ self.initial = list_action.do_action(self.jobs, self.resources, self.initial, args).clauses
+
+ def refinements(self, library): # refinements may be (multiple) HLA themselves ...
+ """
+ State is a Problem, containing the current state kb library is a
+ dictionary containing details for every possible refinement. e.g.:
+ {
+ 'HLA': [
+ 'Go(Home, SFO)',
+ 'Go(Home, SFO)',
+ 'Drive(Home, SFOLongTermParking)',
+ 'Shuttle(SFOLongTermParking, SFO)',
+ 'Taxi(Home, SFO)'
+ ],
+ 'steps': [
+ ['Drive(Home, SFOLongTermParking)', 'Shuttle(SFOLongTermParking, SFO)'],
+ ['Taxi(Home, SFO)'],
+ [],
+ [],
+ []
+ ],
+ # empty refinements indicate a primitive action
+ 'precond': [
+ ['At(Home) & Have(Car)'],
+ ['At(Home)'],
+ ['At(Home) & Have(Car)'],
+ ['At(SFOLongTermParking)'],
+ ['At(Home)']
+ ],
+ 'effect': [
+ ['At(SFO) & ~At(Home)'],
+ ['At(SFO) & ~At(Home)'],
+ ['At(SFOLongTermParking) & ~At(Home)'],
+ ['At(SFO) & ~At(SFOLongTermParking)'],
+ ['At(SFO) & ~At(Home)']
+ ]}
+ """
+ indices = [i for i, x in enumerate(library['HLA']) if expr(x).op == self.name]
+ for i in indices:
+ actions = []
+ for j in range(len(library['steps'][i])):
+ # find the index of the step [j] of the HLA
+ index_step = [k for k, x in enumerate(library['HLA']) if x == library['steps'][i][j]][0]
+ precond = library['precond'][index_step][0] # preconditions of step [j]
+ effect = library['effect'][index_step][0] # effect of step [j]
+ actions.append(HLA(library['steps'][i][j], precond, effect))
+ yield actions
+
+ def hierarchical_search(self, hierarchy):
+ """
+ [Figure 11.5]
+ 'Hierarchical Search, a Breadth First Search implementation of Hierarchical
+ Forward Planning Search'
+ The problem is a real-world problem defined by the problem class, and the hierarchy is
+ a dictionary of HLA - refinements (see refinements generator for details)
+ """
+ act = Node(self.initial, None, [self.actions[0]])
+ frontier = deque()
+ frontier.append(act)
+ while True:
+ if not frontier:
+ return None
+ plan = frontier.popleft()
+ # finds the first non primitive hla in plan actions
+ (hla, index) = RealWorldPlanningProblem.find_hla(plan, hierarchy)
+ prefix = plan.action[:index]
+ outcome = RealWorldPlanningProblem(
+ RealWorldPlanningProblem.result(self.initial, prefix), self.goals, self.actions)
+ suffix = plan.action[index + 1:]
+ if not hla: # hla is None and plan is primitive
+ if outcome.goal_test():
+ return plan.action
+ else:
+ for sequence in RealWorldPlanningProblem.refinements(hla, hierarchy): # find refinements
+ frontier.append(Node(outcome.initial, plan, prefix + sequence + suffix))
+
+ def result(state, actions):
+ """The outcome of applying an action to the current problem"""
+ for a in actions:
+ if a.check_precond(state, a.args):
+ state = a(state, a.args).clauses
+ return state
+
+ def angelic_search(self, hierarchy, initial_plan):
+ """
+ [Figure 11.8]
+ A hierarchical planning algorithm that uses angelic semantics to identify and
+ commit to high-level plans that work while avoiding high-level plans that don’t.
+ The predicate MAKING-PROGRESS checks to make sure that we aren’t stuck in an infinite regression
+ of refinements.
+ At top level, call ANGELIC-SEARCH with [Act] as the initialPlan.
+
+ InitialPlan contains a sequence of HLA's with angelic semantics
+
+ The possible effects of an angelic HLA in initialPlan are:
+ ~ : effect remove
+ $+: effect possibly add
+ $-: effect possibly remove
+ $$: possibly add or remove
+ """
+ frontier = deque(initial_plan)
+ while True:
+ if not frontier:
+ return None
+ plan = frontier.popleft() # sequence of HLA/Angelic HLA's
+ opt_reachable_set = RealWorldPlanningProblem.reach_opt(self.initial, plan)
+ pes_reachable_set = RealWorldPlanningProblem.reach_pes(self.initial, plan)
+ if self.intersects_goal(opt_reachable_set):
+ if RealWorldPlanningProblem.is_primitive(plan, hierarchy):
+ return [x for x in plan.action]
+ guaranteed = self.intersects_goal(pes_reachable_set)
+ if guaranteed and RealWorldPlanningProblem.making_progress(plan, initial_plan):
+ final_state = guaranteed[0] # any element of guaranteed
+ return RealWorldPlanningProblem.decompose(hierarchy, final_state, pes_reachable_set)
+ # there should be at least one HLA/AngelicHLA, otherwise plan would be primitive
+ hla, index = RealWorldPlanningProblem.find_hla(plan, hierarchy)
+ prefix = plan.action[:index]
+ suffix = plan.action[index + 1:]
+ outcome = RealWorldPlanningProblem(
+ RealWorldPlanningProblem.result(self.initial, prefix), self.goals, self.actions)
+ for sequence in RealWorldPlanningProblem.refinements(hla, hierarchy): # find refinements
+ frontier.append(
+ AngelicNode(outcome.initial, plan, prefix + sequence + suffix, prefix + sequence + suffix))
+
+ def intersects_goal(self, reachable_set):
+ """
+ Find the intersection of the reachable states and the goal
+ """
+ return [y for x in list(reachable_set.keys())
+ for y in reachable_set[x]
+ if all(goal in y for goal in self.goals)]
+
+ def is_primitive(plan, library):
+ """
+ checks if the hla is primitive action
+ """
+ for hla in plan.action:
+ indices = [i for i, x in enumerate(library['HLA']) if expr(x).op == hla.name]
+ for i in indices:
+ if library["steps"][i]:
+ return False
+ return True
+
+ def reach_opt(init, plan):
+ """
+ Finds the optimistic reachable set of the sequence of actions in plan
+ """
+ reachable_set = {0: [init]}
+ optimistic_description = plan.action # list of angelic actions with optimistic description
+ return RealWorldPlanningProblem.find_reachable_set(reachable_set, optimistic_description)
+
+ def reach_pes(init, plan):
+ """
+ Finds the pessimistic reachable set of the sequence of actions in plan
+ """
+ reachable_set = {0: [init]}
+ pessimistic_description = plan.action_pes # list of angelic actions with pessimistic description
+ return RealWorldPlanningProblem.find_reachable_set(reachable_set, pessimistic_description)
+
+ def find_reachable_set(reachable_set, action_description):
+ """
+ Finds the reachable states of the action_description when applied in each state of reachable set.
+ """
+ for i in range(len(action_description)):
+ reachable_set[i + 1] = []
+ if type(action_description[i]) is AngelicHLA:
+ possible_actions = action_description[i].angelic_action()
+ else:
+ possible_actions = action_description
+ for action in possible_actions:
+ for state in reachable_set[i]:
+ if action.check_precond(state, action.args):
+ if action.effect[0]:
+ new_state = action(state, action.args).clauses
+ reachable_set[i + 1].append(new_state)
+ else:
+ reachable_set[i + 1].append(state)
+ return reachable_set
+
+ def find_hla(plan, hierarchy):
+ """
+ Finds the the first HLA action in plan.action, which is not primitive
+ and its corresponding index in plan.action
+ """
+ hla = None
+ index = len(plan.action)
+ for i in range(len(plan.action)): # find the first HLA in plan, that is not primitive
+ if not RealWorldPlanningProblem.is_primitive(Node(plan.state, plan.parent, [plan.action[i]]), hierarchy):
+ hla = plan.action[i]
+ index = i
+ break
+ return hla, index
+
+ def making_progress(plan, initial_plan):
+ """
+ Prevents from infinite regression of refinements
+
+ (infinite regression of refinements happens when the algorithm finds a plan that
+ its pessimistic reachable set intersects the goal inside a call to decompose on
+ the same plan, in the same circumstances)
+ """
+ for i in range(len(initial_plan)):
+ if plan == initial_plan[i]:
return False
return True
- # Actions
+ def decompose(hierarchy, plan, s_f, reachable_set):
+ solution = []
+ i = max(reachable_set.keys())
+ while plan.action_pes:
+ action = plan.action_pes.pop()
+ if i == 0:
+ return solution
+ s_i = RealWorldPlanningProblem.find_previous_state(s_f, reachable_set, i, action)
+ problem = RealWorldPlanningProblem(s_i, s_f, plan.action)
+ angelic_call = RealWorldPlanningProblem.angelic_search(problem, hierarchy,
+ [AngelicNode(s_i, Node(None), [action], [action])])
+ if angelic_call:
+ for x in angelic_call:
+ solution.insert(0, x)
+ else:
+ return None
+ s_f = s_i
+ i -= 1
+ return solution
+
+ def find_previous_state(s_f, reachable_set, i, action):
+ """
+ Given a final state s_f and an action finds a state s_i in reachable_set
+ such that when action is applied to state s_i returns s_f.
+ """
+ s_i = reachable_set[i - 1][0]
+ for state in reachable_set[i - 1]:
+ if s_f in [x for x in RealWorldPlanningProblem.reach_pes(
+ state, AngelicNode(state, None, [action], [action]))[1]]:
+ s_i = state
+ break
+ return s_i
+
+
+def job_shop_problem():
+ """
+ [Figure 11.1] JOB-SHOP-PROBLEM
+
+ A job-shop scheduling problem for assembling two cars,
+ with resource and ordering constraints.
+
+ Example:
+ >>> from planning import *
+ >>> p = job_shop_problem()
+ >>> p.goal_test()
+ False
+ >>> p.act(p.jobs[1][0])
+ >>> p.act(p.jobs[1][1])
+ >>> p.act(p.jobs[1][2])
+ >>> p.act(p.jobs[0][0])
+ >>> p.act(p.jobs[0][1])
+ >>> p.goal_test()
+ False
+ >>> p.act(p.jobs[0][2])
+ >>> p.goal_test()
+ True
+ >>>
+ """
+ resources = {'EngineHoists': 1, 'WheelStations': 2, 'Inspectors': 2, 'LugNuts': 500}
+
+ add_engine1 = HLA('AddEngine1', precond='~Has(C1, E1)', effect='Has(C1, E1)', duration=30, use={'EngineHoists': 1})
+ add_engine2 = HLA('AddEngine2', precond='~Has(C2, E2)', effect='Has(C2, E2)', duration=60, use={'EngineHoists': 1})
+ add_wheels1 = HLA('AddWheels1', precond='~Has(C1, W1)', effect='Has(C1, W1)', duration=30, use={'WheelStations': 1},
+ consume={'LugNuts': 20})
+ add_wheels2 = HLA('AddWheels2', precond='~Has(C2, W2)', effect='Has(C2, W2)', duration=15, use={'WheelStations': 1},
+ consume={'LugNuts': 20})
+ inspect1 = HLA('Inspect1', precond='~Inspected(C1)', effect='Inspected(C1)', duration=10, use={'Inspectors': 1})
+ inspect2 = HLA('Inspect2', precond='~Inspected(C2)', effect='Inspected(C2)', duration=10, use={'Inspectors': 1})
+
+ actions = [add_engine1, add_engine2, add_wheels1, add_wheels2, inspect1, inspect2]
+
+ job_group1 = [add_engine1, add_wheels1, inspect1]
+ job_group2 = [add_engine2, add_wheels2, inspect2]
+
+ return RealWorldPlanningProblem(
+ initial='Car(C1) & Car(C2) & Wheels(W1) & Wheels(W2) & Engine(E2) & Engine(E2) & ~Has(C1, E1) & ~Has(C2, '
+ 'E2) & ~Has(C1, W1) & ~Has(C2, W2) & ~Inspected(C1) & ~Inspected(C2)',
+ goals='Has(C1, W1) & Has(C1, E1) & Inspected(C1) & Has(C2, W2) & Has(C2, E2) & Inspected(C2)',
+ actions=actions,
+ jobs=[job_group1, job_group2],
+ resources=resources)
+
+
+def go_to_sfo():
+ """Go to SFO Problem"""
+
+ go_home_sfo1 = HLA('Go(Home, SFO)', precond='At(Home) & Have(Car)', effect='At(SFO) & ~At(Home)')
+ go_home_sfo2 = HLA('Go(Home, SFO)', precond='At(Home)', effect='At(SFO) & ~At(Home)')
+ drive_home_sfoltp = HLA('Drive(Home, SFOLongTermParking)', precond='At(Home) & Have(Car)',
+ effect='At(SFOLongTermParking) & ~At(Home)')
+ shuttle_sfoltp_sfo = HLA('Shuttle(SFOLongTermParking, SFO)', precond='At(SFOLongTermParking)',
+ effect='At(SFO) & ~At(SFOLongTermParking)')
+ taxi_home_sfo = HLA('Taxi(Home, SFO)', precond='At(Home)', effect='At(SFO) & ~At(Home)')
+
+ actions = [go_home_sfo1, go_home_sfo2, drive_home_sfoltp, shuttle_sfoltp_sfo, taxi_home_sfo]
+
+ library = {
+ 'HLA': [
+ 'Go(Home, SFO)',
+ 'Go(Home, SFO)',
+ 'Drive(Home, SFOLongTermParking)',
+ 'Shuttle(SFOLongTermParking, SFO)',
+ 'Taxi(Home, SFO)'
+ ],
+ 'steps': [
+ ['Drive(Home, SFOLongTermParking)', 'Shuttle(SFOLongTermParking, SFO)'],
+ ['Taxi(Home, SFO)'],
+ [],
+ [],
+ []
+ ],
+ 'precond': [
+ ['At(Home) & Have(Car)'],
+ ['At(Home)'],
+ ['At(Home) & Have(Car)'],
+ ['At(SFOLongTermParking)'],
+ ['At(Home)']
+ ],
+ 'effect': [
+ ['At(SFO) & ~At(Home)'],
+ ['At(SFO) & ~At(Home)'],
+ ['At(SFOLongTermParking) & ~At(Home)'],
+ ['At(SFO) & ~At(SFOLongTermParking)'],
+ ['At(SFO) & ~At(Home)']]}
+
+ return RealWorldPlanningProblem(initial='At(Home)', goals='At(SFO)', actions=actions), library
+
+
+class AngelicHLA(HLA):
+ """
+ Define Actions for the real-world (that may be refined further), under angelic semantics
+ """
+
+ def __init__(self, action, precond, effect, duration=0, consume=None, use=None):
+ super().__init__(action, precond, effect, duration, consume, use)
+
+ def convert(self, clauses):
+ """
+ Converts strings into Exprs
+ An HLA with angelic semantics can achieve the effects of simple HLA's (add / remove a variable)
+ and furthermore can have following effects on the variables:
+ Possibly add variable ( $+ )
+ Possibly remove variable ( $- )
+ Possibly add or remove a variable ( $$ )
+
+ Overrides HLA.convert function
+ """
+ lib = {'~': 'Not',
+ '$+': 'PosYes',
+ '$-': 'PosNot',
+ '$$': 'PosYesNot'}
+
+ if isinstance(clauses, Expr):
+ clauses = conjuncts(clauses)
+ for i in range(len(clauses)):
+ for ch in lib.keys():
+ if clauses[i].op == ch:
+ clauses[i] = expr(lib[ch] + str(clauses[i].args[0]))
+
+ elif isinstance(clauses, str):
+ for ch in lib.keys():
+ clauses = clauses.replace(ch, lib[ch])
+ if len(clauses) > 0:
+ clauses = expr(clauses)
+
+ try:
+ clauses = conjuncts(clauses)
+ except AttributeError:
+ pass
+
+ return clauses
+
+ def angelic_action(self):
+ """
+ Converts a high level action (HLA) with angelic semantics into all of its corresponding high level actions (HLA).
+ An HLA with angelic semantics can achieve the effects of simple HLA's (add / remove a variable)
+ and furthermore can have following effects for each variable:
+
+ Possibly add variable ( $+: 'PosYes' ) --> corresponds to two HLAs:
+ HLA_1: add variable
+ HLA_2: leave variable unchanged
- # Hit
- precond_pos = [expr("Approaching(Ball,loc)"), expr("At(actor,loc)")]
- precond_neg = []
- effect_add = [expr("Returned(Ball)")]
- effect_rem = []
- hit = Action(expr("Hit(actor, Ball)"), [precond_pos, precond_neg], [effect_add, effect_rem])
+ Possibly remove variable ( $-: 'PosNot' ) --> corresponds to two HLAs:
+ HLA_1: remove variable
+ HLA_2: leave variable unchanged
- # Go
- precond_pos = [expr("At(actor, loc)")]
- precond_neg = []
- effect_add = [expr("At(actor, to)")]
- effect_rem = [expr("At(actor, loc)")]
- go = Action(expr("Go(actor, to)"), [precond_pos, precond_neg], [effect_add, effect_rem])
+ Possibly add / remove a variable ( $$: 'PosYesNot' ) --> corresponds to three HLAs:
+ HLA_1: add variable
+ HLA_2: remove variable
+ HLA_3: leave variable unchanged
+
+
+ example: the angelic action with effects possibly add A and possibly add or remove B corresponds to the
+ following 6 effects of HLAs:
+
+
+ '$+A & $$B': HLA_1: 'A & B' (add A and add B)
+ HLA_2: 'A & ~B' (add A and remove B)
+ HLA_3: 'A' (add A)
+ HLA_4: 'B' (add B)
+ HLA_5: '~B' (remove B)
+ HLA_6: ' ' (no effect)
+
+ """
+
+ effects = [[]]
+ for clause in self.effect:
+ (n, w) = AngelicHLA.compute_parameters(clause)
+ effects = effects * n # create n copies of effects
+ it = range(1)
+ if len(effects) != 0:
+ # split effects into n sublists (separate n copies created in compute_parameters)
+ it = range(len(effects) // n)
+ for i in it:
+ if effects[i]:
+ if clause.args:
+ effects[i] = expr(str(effects[i]) + '&' + str(
+ Expr(clause.op[w:], clause.args[0]))) # make changes in the ith part of effects
+ if n == 3:
+ effects[i + len(effects) // 3] = expr(
+ str(effects[i + len(effects) // 3]) + '&' + str(Expr(clause.op[6:], clause.args[0])))
+ else:
+ effects[i] = expr(
+ str(effects[i]) + '&' + str(expr(clause.op[w:]))) # make changes in the ith part of effects
+ if n == 3:
+ effects[i + len(effects) // 3] = expr(
+ str(effects[i + len(effects) // 3]) + '&' + str(expr(clause.op[6:])))
+
+ else:
+ if clause.args:
+ effects[i] = Expr(clause.op[w:], clause.args[0]) # make changes in the ith part of effects
+ if n == 3:
+ effects[i + len(effects) // 3] = Expr(clause.op[6:], clause.args[0])
+
+ else:
+ effects[i] = expr(clause.op[w:]) # make changes in the ith part of effects
+ if n == 3:
+ effects[i + len(effects) // 3] = expr(clause.op[6:])
+
+ return [HLA(Expr(self.name, self.args), self.precond, effects[i]) for i in range(len(effects))]
+
+ def compute_parameters(clause):
+ """
+ computes n,w
+
+ n = number of HLA effects that the angelic HLA corresponds to
+ w = length of representation of angelic HLA effect
+
+ n = 1, if effect is add
+ n = 1, if effect is remove
+ n = 2, if effect is possibly add
+ n = 2, if effect is possibly remove
+ n = 3, if effect is possibly add or remove
+
+ """
+ if clause.op[:9] == 'PosYesNot':
+ # possibly add/remove variable: three possible effects for the variable
+ n = 3
+ w = 9
+ elif clause.op[:6] == 'PosYes': # possibly add variable: two possible effects for the variable
+ n = 2
+ w = 6
+ elif clause.op[:6] == 'PosNot': # possibly remove variable: two possible effects for the variable
+ n = 2
+ w = 3 # We want to keep 'Not' from 'PosNot' when adding action
+ else: # variable or ~variable
+ n = 1
+ w = 0
+ return n, w
+
+
+class AngelicNode(Node):
+ """
+ Extends the class Node.
+ self.action: contains the optimistic description of an angelic HLA
+ self.action_pes: contains the pessimistic description of an angelic HLA
+ """
- return PDLL(init, [hit, go], goal_test)
+ def __init__(self, state, parent=None, action_opt=None, action_pes=None, path_cost=0):
+ super().__init__(state, parent, action_opt, path_cost)
+ self.action_pes = action_pes
diff --git a/planning_angelic_search.ipynb b/planning_angelic_search.ipynb
new file mode 100644
index 000000000..71408e1d9
--- /dev/null
+++ b/planning_angelic_search.ipynb
@@ -0,0 +1,638 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Angelic Search \n",
+ "\n",
+ "Search using angelic semantics (is a hierarchical search), where the agent chooses the implementation of the HLA's. \n",
+ "The algorithms input is: problem, hierarchy and initialPlan\n",
+ "- problem is of type Problem \n",
+ "- hierarchy is a dictionary consisting of all the actions. \n",
+ "- initialPlan is an approximate description(optimistic and pessimistic) of the agents choices for the implementation. \n",
+ " initialPlan contains a sequence of HLA's with angelic semantics"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from planning import * \n",
+ "from notebook import psource"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The Angelic search algorithm consists of three parts. \n",
+ "- Search using angelic semantics\n",
+ "- Decompose\n",
+ "- a search in the space of refinements, in a similar way with hierarchical search\n",
+ "\n",
+ "### Searching using angelic semantics\n",
+ "- Find the reachable set (optimistic and pessimistic) of the sequence of angelic HLA in initialPlan\n",
+ " - If the optimistic reachable set doesn't intersect the goal, then there is no solution\n",
+ " - If the pessimistic reachable set intersects the goal, then we call decompose, in order to find the sequence of actions that lead us to the goal. \n",
+ " - If the optimistic reachable set intersects the goal, but the pessimistic doesn't we do some further refinements, in order to see if there is a sequence of actions that achieves the goal. \n",
+ " \n",
+ "### Search in space of refinements\n",
+ "- Create a search tree, that has root the action and children it's refinements\n",
+ "- Extend frontier by adding each refinement, so that we keep looping till we find all primitive actions\n",
+ "- If we achieve that we return the path of the solution (search tree), else there is no solution and we return None.\n",
+ "\n",
+ " \n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ " Codestin Search App \n",
+ " \n",
+ " \n",
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ " def angelic_search ( problem , hierarchy , initialPlan ): \n",
+ " """ \n",
+ "\t[Figure 11.8] A hierarchical planning algorithm that uses angelic semantics to identify and \n",
+ "\tcommit to high-level plans that work while avoiding high-level plans that don’t. \n",
+ "\tThe predicate MAKING-PROGRESS checks to make sure that we aren’t stuck in an infinite regression \n",
+ "\tof refinements. \n",
+ "\tAt top level, call ANGELIC -SEARCH with [Act ] as the initialPlan . \n",
+ "\n",
+ " initialPlan contains a sequence of HLA's with angelic semantics \n",
+ "\n",
+ " The possible effects of an angelic HLA in initialPlan are : \n",
+ " ~ : effect remove \n",
+ " $+: effect possibly add \n",
+ " $-: effect possibly remove \n",
+ " $$: possibly add or remove \n",
+ "\t""" \n",
+ " frontier = deque ( initialPlan ) \n",
+ " while True : \n",
+ " if not frontier : \n",
+ " return None \n",
+ " plan = frontier . popleft () # sequence of HLA/Angelic HLA's \n",
+ " opt_reachable_set = Problem . reach_opt ( problem . init , plan ) \n",
+ " pes_reachable_set = Problem . reach_pes ( problem . init , plan ) \n",
+ " if problem . intersects_goal ( opt_reachable_set ): \n",
+ " if Problem . is_primitive ( plan , hierarchy ): \n",
+ " return ([ x for x in plan . action ]) \n",
+ " guaranteed = problem . intersects_goal ( pes_reachable_set ) \n",
+ " if guaranteed and Problem . making_progress ( plan , initialPlan ): \n",
+ " final_state = guaranteed [ 0 ] # any element of guaranteed \n",
+ " #print('decompose') \n",
+ " return Problem . decompose ( hierarchy , problem , plan , final_state , pes_reachable_set ) \n",
+ " ( hla , index ) = Problem . find_hla ( plan , hierarchy ) # there should be at least one HLA/Angelic_HLA, otherwise plan would be primitive. \n",
+ " prefix = plan . action [: index ] \n",
+ " suffix = plan . action [ index + 1 :] \n",
+ " outcome = Problem ( Problem . result ( problem . init , prefix ), problem . goals , problem . actions ) \n",
+ " for sequence in Problem . refinements ( hla , outcome , hierarchy ): # find refinements \n",
+ " frontier . append ( Angelic_Node ( outcome . init , plan , prefix + sequence + suffix , prefix + sequence + suffix )) \n",
+ " \n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "psource(Problem.angelic_search)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "\n",
+ "### Decompose \n",
+ "- Finds recursively the sequence of states and actions that lead us from initial state to goal.\n",
+ "- For each of the above actions we find their refinements,if they are not primitive, by calling the angelic_search function. \n",
+ " If there are not refinements return None\n",
+ " \n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ " Codestin Search App \n",
+ " \n",
+ " \n",
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ " def decompose ( hierarchy , s_0 , plan , s_f , reachable_set ): \n",
+ " solution = [] \n",
+ " i = max ( reachable_set . keys ()) \n",
+ " while plan . action_pes : \n",
+ " action = plan . action_pes . pop () \n",
+ " if ( i == 0 ): \n",
+ " return solution \n",
+ " s_i = Problem . find_previous_state ( s_f , reachable_set , i , action ) \n",
+ " problem = Problem ( s_i , s_f , plan . action ) \n",
+ " angelic_call = Problem . angelic_search ( problem , hierarchy , [ Angelic_Node ( s_i , Node ( None ), [ action ],[ action ])]) \n",
+ " if angelic_call : \n",
+ " for x in angelic_call : \n",
+ " solution . insert ( 0 , x ) \n",
+ " else : \n",
+ " return None \n",
+ " s_f = s_i \n",
+ " i -= 1 \n",
+ " return solution \n",
+ " \n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "psource(Problem.decompose)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Example\n",
+ "\n",
+ "Suppose that somebody wants to get to the airport. \n",
+ "The possible ways to do so is either get a taxi, or drive to the airport. \n",
+ "Those two actions have some preconditions and some effects. \n",
+ "If you get the taxi, you need to have cash, whereas if you drive you need to have a car. \n",
+ "Thus we define the following hierarchy of possible actions.\n",
+ "\n",
+ "##### hierarchy"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "library = {\n",
+ " 'HLA': ['Go(Home,SFO)', 'Go(Home,SFO)', 'Drive(Home, SFOLongTermParking)', 'Shuttle(SFOLongTermParking, SFO)', 'Taxi(Home, SFO)'],\n",
+ " 'steps': [['Drive(Home, SFOLongTermParking)', 'Shuttle(SFOLongTermParking, SFO)'], ['Taxi(Home, SFO)'], [], [], []],\n",
+ " 'precond': [['At(Home) & Have(Car)'], ['At(Home)'], ['At(Home) & Have(Car)'], ['At(SFOLongTermParking)'], ['At(Home)']],\n",
+ " 'effect': [['At(SFO) & ~At(Home)'], ['At(SFO) & ~At(Home) & ~Have(Cash)'], ['At(SFOLongTermParking) & ~At(Home)'], ['At(SFO) & ~At(LongTermParking)'], ['At(SFO) & ~At(Home) & ~Have(Cash)']] }\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "\n",
+ "the possible actions are the following:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "go_SFO = HLA('Go(Home,SFO)', precond='At(Home)', effect='At(SFO) & ~At(Home)')\n",
+ "taxi_SFO = HLA('Taxi(Home,SFO)', precond='At(Home)', effect='At(SFO) & ~At(Home) & ~Have(Cash)')\n",
+ "drive_SFOLongTermParking = HLA('Drive(Home, SFOLongTermParking)', 'At(Home) & Have(Car)','At(SFOLongTermParking) & ~At(Home)' )\n",
+ "shuttle_SFO = HLA('Shuttle(SFOLongTermParking, SFO)', 'At(SFOLongTermParking)', 'At(SFO) & ~At(LongTermParking)')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Suppose that (our preconditionds are that) we are Home and we have cash and car and our goal is to get to SFO and maintain our cash, and our possible actions are the above. \n",
+ "##### Then our problem is: "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "prob = Problem('At(Home) & Have(Cash) & Have(Car)', 'At(SFO) & Have(Cash)', [go_SFO, taxi_SFO, drive_SFOLongTermParking,shuttle_SFO])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "An agent gives us some approximate information about the plan we will follow: \n",
+ "(initialPlan is an Angelic Node, where: \n",
+ "- state is the initial state of the problem, \n",
+ "- parent is None \n",
+ "- action: is a list of actions (Angelic HLA's) with the optimistic estimators of effects and \n",
+ "- action_pes: is a list of actions (Angelic HLA's) with the pessimistic approximations of the effects\n",
+ "##### InitialPlan"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "angelic_opt_description = Angelic_HLA('Go(Home, SFO)', precond = 'At(Home)', effect ='$+At(SFO) & $-At(Home)' ) \n",
+ "angelic_pes_description = Angelic_HLA('Go(Home, SFO)', precond = 'At(Home)', effect ='$+At(SFO) & ~At(Home)' )\n",
+ "\n",
+ "initialPlan = [Angelic_Node(prob.init, None, [angelic_opt_description], [angelic_pes_description])] \n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We want to find the optimistic and pessimistic reachable set of initialPlan when applied to the problem:\n",
+ "##### Optimistic/Pessimistic reachable set"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[[At(Home), Have(Cash), Have(Car)], [Have(Cash), Have(Car), At(SFO), NotAt(Home)], [Have(Cash), Have(Car), NotAt(Home)], [At(Home), Have(Cash), Have(Car), At(SFO)], [At(Home), Have(Cash), Have(Car)]] \n",
+ "\n",
+ "[[At(Home), Have(Cash), Have(Car)], [Have(Cash), Have(Car), At(SFO), NotAt(Home)], [Have(Cash), Have(Car), NotAt(Home)]]\n"
+ ]
+ }
+ ],
+ "source": [
+ "opt_reachable_set = Problem.reach_opt(prob.init, initialPlan[0])\n",
+ "pes_reachable_set = Problem.reach_pes(prob.init, initialPlan[0])\n",
+ "print([x for y in opt_reachable_set.keys() for x in opt_reachable_set[y]], '\\n')\n",
+ "print([x for y in pes_reachable_set.keys() for x in pes_reachable_set[y]])\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "##### Refinements"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[HLA(Drive(Home, SFOLongTermParking)), HLA(Shuttle(SFOLongTermParking, SFO))]\n",
+ "[{'duration': 0, 'effect': [At(SFOLongTermParking), NotAt(Home)], 'args': (Home, SFOLongTermParking), 'uses': {}, 'consumes': {}, 'name': 'Drive', 'completed': False, 'precond': [At(Home), Have(Car)]}, {'duration': 0, 'effect': [At(SFO), NotAt(LongTermParking)], 'args': (SFOLongTermParking, SFO), 'uses': {}, 'consumes': {}, 'name': 'Shuttle', 'completed': False, 'precond': [At(SFOLongTermParking)]}] \n",
+ "\n",
+ "[HLA(Taxi(Home, SFO))]\n",
+ "[{'duration': 0, 'effect': [At(SFO), NotAt(Home), NotHave(Cash)], 'args': (Home, SFO), 'uses': {}, 'consumes': {}, 'name': 'Taxi', 'completed': False, 'precond': [At(Home)]}] \n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "for sequence in Problem.refinements(go_SFO, prob, library):\n",
+ " print (sequence)\n",
+ " print([x.__dict__ for x in sequence ], '\\n')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Run the angelic search\n",
+ "##### Top level call"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[HLA(Drive(Home, SFOLongTermParking)), HLA(Shuttle(SFOLongTermParking, SFO))] \n",
+ "\n",
+ "[{'duration': 0, 'effect': [At(SFOLongTermParking), NotAt(Home)], 'args': (Home, SFOLongTermParking), 'uses': {}, 'consumes': {}, 'name': 'Drive', 'completed': False, 'precond': [At(Home), Have(Car)]}, {'duration': 0, 'effect': [At(SFO), NotAt(LongTermParking)], 'args': (SFOLongTermParking, SFO), 'uses': {}, 'consumes': {}, 'name': 'Shuttle', 'completed': False, 'precond': [At(SFOLongTermParking)]}]\n"
+ ]
+ }
+ ],
+ "source": [
+ "plan= Problem.angelic_search(prob, library, initialPlan)\n",
+ "print (plan, '\\n')\n",
+ "print ([x.__dict__ for x in plan])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Example 2"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "library_2 = {\n",
+ " 'HLA': ['Go(Home,SFO)', 'Go(Home,SFO)', 'Bus(Home, MetroStop)', 'Metro(MetroStop, SFO)' , 'Metro(MetroStop, SFO)', 'Metro1(MetroStop, SFO)', 'Metro2(MetroStop, SFO)' ,'Taxi(Home, SFO)'],\n",
+ " 'steps': [['Bus(Home, MetroStop)', 'Metro(MetroStop, SFO)'], ['Taxi(Home, SFO)'], [], ['Metro1(MetroStop, SFO)'], ['Metro2(MetroStop, SFO)'],[],[],[]],\n",
+ " 'precond': [['At(Home)'], ['At(Home)'], ['At(Home)'], ['At(MetroStop)'], ['At(MetroStop)'],['At(MetroStop)'], ['At(MetroStop)'] ,['At(Home) & Have(Cash)']],\n",
+ " 'effect': [['At(SFO) & ~At(Home)'], ['At(SFO) & ~At(Home) & ~Have(Cash)'], ['At(MetroStop) & ~At(Home)'], ['At(SFO) & ~At(MetroStop)'], ['At(SFO) & ~At(MetroStop)'], ['At(SFO) & ~At(MetroStop)'] , ['At(SFO) & ~At(MetroStop)'] ,['At(SFO) & ~At(Home) & ~Have(Cash)']] \n",
+ " }"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[HLA(Bus(Home, MetroStop)), HLA(Metro1(MetroStop, SFO))] \n",
+ "\n",
+ "[{'duration': 0, 'effect': [At(MetroStop), NotAt(Home)], 'args': (Home, MetroStop), 'uses': {}, 'consumes': {}, 'name': 'Bus', 'completed': False, 'precond': [At(Home)]}, {'duration': 0, 'effect': [At(SFO), NotAt(MetroStop)], 'args': (MetroStop, SFO), 'uses': {}, 'consumes': {}, 'name': 'Metro1', 'completed': False, 'precond': [At(MetroStop)]}]\n"
+ ]
+ }
+ ],
+ "source": [
+ "plan_2 = Problem.angelic_search(prob, library_2, initialPlan)\n",
+ "print(plan_2, '\\n')\n",
+ "print([x.__dict__ for x in plan_2])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Example 3 \n",
+ "\n",
+ "Sometimes there is no plan that achieves the goal!"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "library_3 = {\n",
+ " 'HLA': ['Shuttle(SFOLongTermParking, SFO)', 'Go(Home, SFOLongTermParking)', 'Taxi(Home, SFOLongTermParking)', 'Drive(Home, SFOLongTermParking)', 'Drive(SFOLongTermParking, Home)', 'Get(Cash)', 'Go(Home, ATM)'],\n",
+ " 'steps': [['Get(Cash)', 'Go(Home, SFOLongTermParking)'], ['Taxi(Home, SFOLongTermParking)'], [], [], [], ['Drive(SFOLongTermParking, Home)', 'Go(Home, ATM)'], []],\n",
+ " 'precond': [['At(SFOLongTermParking)'], ['At(Home)'], ['At(Home) & Have(Cash)'], ['At(Home)'], ['At(SFOLongTermParking)'], ['At(SFOLongTermParking)'], ['At(Home)']],\n",
+ " 'effect': [['At(SFO)'], ['At(SFO)'], ['At(SFOLongTermParking) & ~Have(Cash)'], ['At(SFOLongTermParking)'] ,['At(Home) & ~At(SFOLongTermParking)'], ['At(Home) & Have(Cash)'], ['Have(Cash)'] ]\n",
+ " }\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "shuttle_SFO = HLA('Shuttle(SFOLongTermParking, SFO)', 'Have(Cash) & At(SFOLongTermParking)', 'At(SFO)')\n",
+ "prob_3 = Problem('At(SFOLongTermParking) & Have(Cash)', 'At(SFO) & Have(Cash)', [shuttle_SFO])\n",
+ "# optimistic/pessimistic descriptions\n",
+ "angelic_opt_description = Angelic_HLA('Shuttle(SFOLongTermParking, SFO)', precond = 'At(SFOLongTermParking)', effect ='$+At(SFO) & $-At(SFOLongTermParking)' ) \n",
+ "angelic_pes_description = Angelic_HLA('Shuttle(SFOLongTermParking, SFO)', precond = 'At(SFOLongTermParking)', effect ='$+At(SFO) & ~At(SFOLongTermParking)' ) \n",
+ "# initial Plan\n",
+ "initialPlan_3 = [Angelic_Node(prob.init, None, [angelic_opt_description], [angelic_pes_description])] "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "None\n"
+ ]
+ }
+ ],
+ "source": [
+ "plan_3 = prob_3.angelic_search(library_3, initialPlan_3)\n",
+ "print(plan_3)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.5.3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}
diff --git a/planning_graphPlan.ipynb b/planning_graphPlan.ipynb
new file mode 100644
index 000000000..bffecb937
--- /dev/null
+++ b/planning_graphPlan.ipynb
@@ -0,0 +1,1066 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## SOLVING PLANNING PROBLEMS\n",
+ "----\n",
+ "### GRAPHPLAN\n",
+ " \n",
+ "The GraphPlan algorithm is a popular method of solving classical planning problems.\n",
+ "Before we get into the details of the algorithm, let's look at a special data structure called **planning graph**, used to give better heuristic estimates and plays a key role in the GraphPlan algorithm."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Planning Graph\n",
+ "A planning graph is a directed graph organized into levels. \n",
+ "Each level contains information about the current state of the knowledge base and the possible state-action links to and from that level.\n",
+ "The first level contains the initial state with nodes representing each fluent that holds in that level.\n",
+ "This level has state-action links linking each state to valid actions in that state.\n",
+ "Each action is linked to all its preconditions and its effect states.\n",
+ "Based on these effects, the next level is constructed.\n",
+ "The next level contains similarly structured information about the next state.\n",
+ "In this way, the graph is expanded using state-action links till we reach a state where all the required goals hold true simultaneously.\n",
+ "We can say that we have reached our goal if none of the goal states in the current level are mutually exclusive.\n",
+ "This will be explained in detail later.\n",
+ " \n",
+ "Planning graphs only work for propositional planning problems, hence we need to eliminate all variables by generating all possible substitutions.\n",
+ " \n",
+ "For example, the planning graph of the `have_cake_and_eat_cake_too` problem might look like this\n",
+ "\n",
+ " \n",
+ "The black lines indicate links between states and actions.\n",
+ " \n",
+ "In every planning problem, we are allowed to carry out the `no-op` action, ie, we can choose no action for a particular state.\n",
+ "These are called 'Persistence' actions and are represented in the graph by the small square boxes.\n",
+ "In technical terms, a persistence action has effects same as its preconditions.\n",
+ "This enables us to carry a state to the next level.\n",
+ " \n",
+ " \n",
+ "The gray lines indicate mutual exclusivity.\n",
+ "This means that the actions connected bya gray line cannot be taken together.\n",
+ "Mutual exclusivity (mutex) occurs in the following cases:\n",
+ "1. **Inconsistent effects**: One action negates the effect of the other. For example, _Eat(Cake)_ and the persistence of _Have(Cake)_ have inconsistent effects because they disagree on the effect _Have(Cake)_\n",
+ "2. **Interference**: One of the effects of an action is the negation of a precondition of the other. For example, _Eat(Cake)_ interferes with the persistence of _Have(Cake)_ by negating its precondition.\n",
+ "3. **Competing needs**: One of the preconditions of one action is mutually exclusive with a precondition of the other. For example, _Bake(Cake)_ and _Eat(Cake)_ are mutex because they compete on the value of the _Have(Cake)_ precondition."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "In the module, planning graphs have been implemented using two classes, `Level` which stores data for a particular level and `Graph` which connects multiple levels together.\n",
+ "Let's look at the `Level` class."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from planning import *\n",
+ "from notebook import psource"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ " Codestin Search App \n",
+ " \n",
+ " \n",
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "class Level : \n",
+ " """ \n",
+ " Contains the state of the planning problem \n",
+ " and exhaustive list of actions which use the \n",
+ " states as pre-condition. \n",
+ " """ \n",
+ "\n",
+ " def __init__ ( self , kb ): \n",
+ " """Initializes variables to hold state and action details of a level""" \n",
+ "\n",
+ " self . kb = kb \n",
+ " # current state \n",
+ " self . current_state = kb . clauses \n",
+ " # current action to state link \n",
+ " self . current_action_links = {} \n",
+ " # current state to action link \n",
+ " self . current_state_links = {} \n",
+ " # current action to next state link \n",
+ " self . next_action_links = {} \n",
+ " # next state to current action link \n",
+ " self . next_state_links = {} \n",
+ " # mutually exclusive actions \n",
+ " self . mutex = [] \n",
+ "\n",
+ " def __call__ ( self , actions , objects ): \n",
+ " self . build ( actions , objects ) \n",
+ " self . find_mutex () \n",
+ "\n",
+ " def separate ( self , e ): \n",
+ " """Separates an iterable of elements into positive and negative parts""" \n",
+ "\n",
+ " positive = [] \n",
+ " negative = [] \n",
+ " for clause in e : \n",
+ " if clause . op [: 3 ] == 'Not' : \n",
+ " negative . append ( clause ) \n",
+ " else : \n",
+ " positive . append ( clause ) \n",
+ " return positive , negative \n",
+ "\n",
+ " def find_mutex ( self ): \n",
+ " """Finds mutually exclusive actions""" \n",
+ "\n",
+ " # Inconsistent effects \n",
+ " pos_nsl , neg_nsl = self . separate ( self . next_state_links ) \n",
+ "\n",
+ " for negeff in neg_nsl : \n",
+ " new_negeff = Expr ( negeff . op [ 3 :], * negeff . args ) \n",
+ " for poseff in pos_nsl : \n",
+ " if new_negeff == poseff : \n",
+ " for a in self . next_state_links [ poseff ]: \n",
+ " for b in self . next_state_links [ negeff ]: \n",
+ " if { a , b } not in self . mutex : \n",
+ " self . mutex . append ({ a , b }) \n",
+ "\n",
+ " # Interference will be calculated with the last step \n",
+ " pos_csl , neg_csl = self . separate ( self . current_state_links ) \n",
+ "\n",
+ " # Competing needs \n",
+ " for posprecond in pos_csl : \n",
+ " for negprecond in neg_csl : \n",
+ " new_negprecond = Expr ( negprecond . op [ 3 :], * negprecond . args ) \n",
+ " if new_negprecond == posprecond : \n",
+ " for a in self . current_state_links [ posprecond ]: \n",
+ " for b in self . current_state_links [ negprecond ]: \n",
+ " if { a , b } not in self . mutex : \n",
+ " self . mutex . append ({ a , b }) \n",
+ "\n",
+ " # Inconsistent support \n",
+ " state_mutex = [] \n",
+ " for pair in self . mutex : \n",
+ " next_state_0 = self . next_action_links [ list ( pair )[ 0 ]] \n",
+ " if len ( pair ) == 2 : \n",
+ " next_state_1 = self . next_action_links [ list ( pair )[ 1 ]] \n",
+ " else : \n",
+ " next_state_1 = self . next_action_links [ list ( pair )[ 0 ]] \n",
+ " if ( len ( next_state_0 ) == 1 ) and ( len ( next_state_1 ) == 1 ): \n",
+ " state_mutex . append ({ next_state_0 [ 0 ], next_state_1 [ 0 ]}) \n",
+ " \n",
+ " self . mutex = self . mutex + state_mutex \n",
+ "\n",
+ " def build ( self , actions , objects ): \n",
+ " """Populates the lists and dictionaries containing the state action dependencies""" \n",
+ "\n",
+ " for clause in self . current_state : \n",
+ " p_expr = Expr ( 'P' + clause . op , * clause . args ) \n",
+ " self . current_action_links [ p_expr ] = [ clause ] \n",
+ " self . next_action_links [ p_expr ] = [ clause ] \n",
+ " self . current_state_links [ clause ] = [ p_expr ] \n",
+ " self . next_state_links [ clause ] = [ p_expr ] \n",
+ "\n",
+ " for a in actions : \n",
+ " num_args = len ( a . args ) \n",
+ " possible_args = tuple ( itertools . permutations ( objects , num_args )) \n",
+ "\n",
+ " for arg in possible_args : \n",
+ " if a . check_precond ( self . kb , arg ): \n",
+ " for num , symbol in enumerate ( a . args ): \n",
+ " if not symbol . op . islower (): \n",
+ " arg = list ( arg ) \n",
+ " arg [ num ] = symbol \n",
+ " arg = tuple ( arg ) \n",
+ "\n",
+ " new_action = a . substitute ( Expr ( a . name , * a . args ), arg ) \n",
+ " self . current_action_links [ new_action ] = [] \n",
+ "\n",
+ " for clause in a . precond : \n",
+ " new_clause = a . substitute ( clause , arg ) \n",
+ " self . current_action_links [ new_action ] . append ( new_clause ) \n",
+ " if new_clause in self . current_state_links : \n",
+ " self . current_state_links [ new_clause ] . append ( new_action ) \n",
+ " else : \n",
+ " self . current_state_links [ new_clause ] = [ new_action ] \n",
+ " \n",
+ " self . next_action_links [ new_action ] = [] \n",
+ " for clause in a . effect : \n",
+ " new_clause = a . substitute ( clause , arg ) \n",
+ "\n",
+ " self . next_action_links [ new_action ] . append ( new_clause ) \n",
+ " if new_clause in self . next_state_links : \n",
+ " self . next_state_links [ new_clause ] . append ( new_action ) \n",
+ " else : \n",
+ " self . next_state_links [ new_clause ] = [ new_action ] \n",
+ "\n",
+ " def perform_actions ( self ): \n",
+ " """Performs the necessary actions and returns a new Level""" \n",
+ "\n",
+ " new_kb = FolKB ( list ( set ( self . next_state_links . keys ()))) \n",
+ " return Level ( new_kb ) \n",
+ " \n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "psource(Level)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Each level stores the following data\n",
+ "1. The current state of the level in `current_state`\n",
+ "2. Links from an action to its preconditions in `current_action_links`\n",
+ "3. Links from a state to the possible actions in that state in `current_state_links`\n",
+ "4. Links from each action to its effects in `next_action_links`\n",
+ "5. Links from each possible next state from each action in `next_state_links`. This stores the same information as the `current_action_links` of the next level.\n",
+ "6. Mutex links in `mutex`.\n",
+ " \n",
+ " \n",
+ "The `find_mutex` method finds the mutex links according to the points given above.\n",
+ " \n",
+ "The `build` method populates the data structures storing the state and action information.\n",
+ "Persistence actions for each clause in the current state are also defined here. \n",
+ "The newly created persistence action has the same name as its state, prefixed with a 'P'."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's now look at the `Graph` class."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ " Codestin Search App \n",
+ " \n",
+ " \n",
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "class Graph : \n",
+ " """ \n",
+ " Contains levels of state and actions \n",
+ " Used in graph planning algorithm to extract a solution \n",
+ " """ \n",
+ "\n",
+ " def __init__ ( self , planningproblem ): \n",
+ " self . planningproblem = planningproblem \n",
+ " self . kb = FolKB ( planningproblem . init ) \n",
+ " self . levels = [ Level ( self . kb )] \n",
+ " self . objects = set ( arg for clause in self . kb . clauses for arg in clause . args ) \n",
+ "\n",
+ " def __call__ ( self ): \n",
+ " self . expand_graph () \n",
+ "\n",
+ " def expand_graph ( self ): \n",
+ " """Expands the graph by a level""" \n",
+ "\n",
+ " last_level = self . levels [ - 1 ] \n",
+ " last_level ( self . planningproblem . actions , self . objects ) \n",
+ " self . levels . append ( last_level . perform_actions ()) \n",
+ "\n",
+ " def non_mutex_goals ( self , goals , index ): \n",
+ " """Checks whether the goals are mutually exclusive""" \n",
+ "\n",
+ " goal_perm = itertools . combinations ( goals , 2 ) \n",
+ " for g in goal_perm : \n",
+ " if set ( g ) in self . levels [ index ] . mutex : \n",
+ " return False \n",
+ " return True \n",
+ " \n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "psource(Graph)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The class stores a problem definition in `pddl`, \n",
+ "a knowledge base in `kb`, \n",
+ "a list of `Level` objects in `levels` and \n",
+ "all the possible arguments found in the initial state of the problem in `objects`.\n",
+ " \n",
+ "The `expand_graph` method generates a new level of the graph.\n",
+ "This method is invoked when the goal conditions haven't been met in the current level or the actions that lead to it are mutually exclusive.\n",
+ "The `non_mutex_goals` method checks whether the goals in the current state are mutually exclusive.\n",
+ " \n",
+ " \n",
+ "Using these two classes, we can define a planning graph which can either be used to provide reliable heuristics for planning problems or used in the `GraphPlan` algorithm.\n",
+ " \n",
+ "Let's have a look at the `GraphPlan` class."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ " Codestin Search App \n",
+ " \n",
+ " \n",
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "class GraphPlan : \n",
+ " """ \n",
+ " Class for formulation GraphPlan algorithm \n",
+ " Constructs a graph of state and action space \n",
+ " Returns solution for the planning problem \n",
+ " """ \n",
+ "\n",
+ " def __init__ ( self , planningproblem ): \n",
+ " self . graph = Graph ( planningproblem ) \n",
+ " self . nogoods = [] \n",
+ " self . solution = [] \n",
+ "\n",
+ " def check_leveloff ( self ): \n",
+ " """Checks if the graph has levelled off""" \n",
+ "\n",
+ " check = ( set ( self . graph . levels [ - 1 ] . current_state ) == set ( self . graph . levels [ - 2 ] . current_state )) \n",
+ "\n",
+ " if check : \n",
+ " return True \n",
+ "\n",
+ " def extract_solution ( self , goals , index ): \n",
+ " """Extracts the solution""" \n",
+ "\n",
+ " level = self . graph . levels [ index ] \n",
+ " if not self . graph . non_mutex_goals ( goals , index ): \n",
+ " self . nogoods . append (( level , goals )) \n",
+ " return \n",
+ "\n",
+ " level = self . graph . levels [ index - 1 ] \n",
+ "\n",
+ " # Create all combinations of actions that satisfy the goal \n",
+ " actions = [] \n",
+ " for goal in goals : \n",
+ " actions . append ( level . next_state_links [ goal ]) \n",
+ "\n",
+ " all_actions = list ( itertools . product ( * actions )) \n",
+ "\n",
+ " # Filter out non-mutex actions \n",
+ " non_mutex_actions = [] \n",
+ " for action_tuple in all_actions : \n",
+ " action_pairs = itertools . combinations ( list ( set ( action_tuple )), 2 ) \n",
+ " non_mutex_actions . append ( list ( set ( action_tuple ))) \n",
+ " for pair in action_pairs : \n",
+ " if set ( pair ) in level . mutex : \n",
+ " non_mutex_actions . pop ( - 1 ) \n",
+ " break \n",
+ " \n",
+ "\n",
+ " # Recursion \n",
+ " for action_list in non_mutex_actions : \n",
+ " if [ action_list , index ] not in self . solution : \n",
+ " self . solution . append ([ action_list , index ]) \n",
+ "\n",
+ " new_goals = [] \n",
+ " for act in set ( action_list ): \n",
+ " if act in level . current_action_links : \n",
+ " new_goals = new_goals + level . current_action_links [ act ] \n",
+ "\n",
+ " if abs ( index ) + 1 == len ( self . graph . levels ): \n",
+ " return \n",
+ " elif ( level , new_goals ) in self . nogoods : \n",
+ " return \n",
+ " else : \n",
+ " self . extract_solution ( new_goals , index - 1 ) \n",
+ "\n",
+ " # Level-Order multiple solutions \n",
+ " solution = [] \n",
+ " for item in self . solution : \n",
+ " if item [ 1 ] == - 1 : \n",
+ " solution . append ([]) \n",
+ " solution [ - 1 ] . append ( item [ 0 ]) \n",
+ " else : \n",
+ " solution [ - 1 ] . append ( item [ 0 ]) \n",
+ "\n",
+ " for num , item in enumerate ( solution ): \n",
+ " item . reverse () \n",
+ " solution [ num ] = item \n",
+ "\n",
+ " return solution \n",
+ "\n",
+ " def goal_test ( self , kb ): \n",
+ " return all ( kb . ask ( q ) is not False for q in self . graph . planningproblem . goals ) \n",
+ "\n",
+ " def execute ( self ): \n",
+ " """Executes the GraphPlan algorithm for the given problem""" \n",
+ "\n",
+ " while True : \n",
+ " self . graph . expand_graph () \n",
+ " if ( self . goal_test ( self . graph . levels [ - 1 ] . kb ) and self . graph . non_mutex_goals ( self . graph . planningproblem . goals , - 1 )): \n",
+ " solution = self . extract_solution ( self . graph . planningproblem . goals , - 1 ) \n",
+ " if solution : \n",
+ " return solution \n",
+ " \n",
+ " if len ( self . graph . levels ) >= 2 and self . check_leveloff (): \n",
+ " return None \n",
+ " \n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "psource(GraphPlan)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Given a planning problem defined as a PlanningProblem, `GraphPlan` creates a planning graph stored in `graph` and expands it till it reaches a state where all its required goals are present simultaneously without mutual exclusivity.\n",
+ " \n",
+ "Once a goal is found, `extract_solution` is called.\n",
+ "This method recursively finds the path to a solution given a planning graph.\n",
+ "In the case where `extract_solution` fails to find a solution for a set of goals as a given level, we record the `(level, goals)` pair as a **no-good**.\n",
+ "Whenever `extract_solution` is called again with the same level and goals, we can find the recorded no-good and immediately return failure rather than searching again. \n",
+ "No-goods are also used in the termination test.\n",
+ " \n",
+ "The `check_leveloff` method checks if the planning graph for the problem has **levelled-off**, ie, it has the same states, actions and mutex pairs as the previous level.\n",
+ "If the graph has already levelled off and we haven't found a solution, there is no point expanding the graph, as it won't lead to anything new.\n",
+ "In such a case, we can declare that the planning problem is unsolvable with the given constraints.\n",
+ " \n",
+ " \n",
+ "To summarize, the `GraphPlan` algorithm calls `expand_graph` and tests whether it has reached the goal and if the goals are non-mutex.\n",
+ " \n",
+ "If so, `extract_solution` is invoked which recursively reconstructs the solution from the planning graph.\n",
+ " \n",
+ "If not, then we check if our graph has levelled off and continue if it hasn't."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's solve a few planning problems that we had defined earlier."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Air cargo problem\n",
+ "In accordance with the summary above, we have defined a helper function to carry out `GraphPlan` on the `air_cargo` problem.\n",
+ "The function is pretty straightforward.\n",
+ "Let's have a look."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ " Codestin Search App \n",
+ " \n",
+ " \n",
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "def air_cargo_graphplan (): \n",
+ " """Solves the air cargo problem using GraphPlan""" \n",
+ " return GraphPlan ( air_cargo ()) . execute () \n",
+ " \n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "psource(air_cargo_graphplan)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's instantiate the problem and find a solution using this helper function."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[[[Load(C2, P2, JFK),\n",
+ " PAirport(SFO),\n",
+ " PAirport(JFK),\n",
+ " PPlane(P2),\n",
+ " PPlane(P1),\n",
+ " Fly(P2, JFK, SFO),\n",
+ " PCargo(C2),\n",
+ " Load(C1, P1, SFO),\n",
+ " Fly(P1, SFO, JFK),\n",
+ " PCargo(C1)],\n",
+ " [Unload(C2, P2, SFO), Unload(C1, P1, JFK)]]]"
+ ]
+ },
+ "execution_count": 19,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "airCargoG = air_cargo_graphplan()\n",
+ "airCargoG"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Each element in the solution is a valid action.\n",
+ "The solution is separated into lists for each level.\n",
+ "The actions prefixed with a 'P' are persistence actions and can be ignored.\n",
+ "They simply carry certain states forward.\n",
+ "We have another helper function `linearize` that presents the solution in a more readable format, much like a total-order planner, but it is _not_ a total-order planner."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[Load(C2, P2, JFK),\n",
+ " Fly(P2, JFK, SFO),\n",
+ " Load(C1, P1, SFO),\n",
+ " Fly(P1, SFO, JFK),\n",
+ " Unload(C2, P2, SFO),\n",
+ " Unload(C1, P1, JFK)]"
+ ]
+ },
+ "execution_count": 20,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "linearize(airCargoG)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Indeed, this is a correct solution.\n",
+ " \n",
+ "There are similar helper functions for some other planning problems.\n",
+ " \n",
+ "Lets' try solving the spare tire problem."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[Remove(Spare, Trunk), Remove(Flat, Axle), PutOn(Spare, Axle)]"
+ ]
+ },
+ "execution_count": 21,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "spareTireG = spare_tire_graphplan()\n",
+ "linearize(spareTireG)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Solution for the cake problem"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[Eat(Cake), Bake(Cake)]"
+ ]
+ },
+ "execution_count": 22,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "cakeProblemG = have_cake_and_eat_cake_too_graphplan()\n",
+ "linearize(cakeProblemG)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Solution for the Sussman's Anomaly configuration of three blocks."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[MoveToTable(C, A), Move(B, Table, C), Move(A, Table, B)]"
+ ]
+ },
+ "execution_count": 23,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "sussmanAnomalyG = three_block_tower_graphplan()\n",
+ "linearize(sussmanAnomalyG)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Solution of the socks and shoes problem"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[RightSock, LeftSock, RightShoe, LeftShoe]"
+ ]
+ },
+ "execution_count": 24,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "socksShoesG = socks_and_shoes_graphplan()\n",
+ "linearize(socksShoesG)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.5.3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}
diff --git a/planning_hierarchical_search.ipynb b/planning_hierarchical_search.ipynb
new file mode 100644
index 000000000..18e57b23b
--- /dev/null
+++ b/planning_hierarchical_search.ipynb
@@ -0,0 +1,546 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Hierarchical Search \n",
+ "\n",
+ "Hierarchical search is a a planning algorithm in high level of abstraction. \n",
+ "Instead of actions as in classical planning (chapter 10) (primitive actions) we now use high level actions (HLAs) (see planning.ipynb) \n",
+ "\n",
+ "## Refinements\n",
+ "\n",
+ "Each __HLA__ has one or more refinements into a sequence of actions, each of which may be an HLA or a primitive action (which has no refinements by definition). \n",
+ "For example:\n",
+ "- (a) the high level action \"Go to San Fransisco airport\" (Go(Home, SFO)), might have two possible refinements, \"Drive to San Fransisco airport\" and \"Taxi to San Fransisco airport\". \n",
+ " \n",
+ "- (b) A recursive refinement for navigation in the vacuum world would be: to get to a\n",
+ "destination, take a step, and then go to the destination.\n",
+ " \n",
+ "\n",
+ " \n",
+ "- __implementation__: An HLA refinement that contains only primitive actions is called an implementation of the HLA\n",
+ "- An implementation of a high-level plan (a sequence of HLAs) is the concatenation of implementations of each HLA in the sequence\n",
+ "- A high-level plan __achieves the goal__ from a given state if at least one of its implementations achieves the goal from that state\n",
+ " \n",
+ "\n",
+ "The refinements function input is: \n",
+ "- __hla__: the HLA of which we want to compute its refinements\n",
+ "- __state__: the knoweledge base of the current problem (Problem.init)\n",
+ "- __library__: the hierarchy of the actions in the planning problem\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from planning import * \n",
+ "from notebook import psource"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ " Codestin Search App \n",
+ " \n",
+ " \n",
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ " def refinements ( hla , state , library ): # refinements may be (multiple) HLA themselves ... \n",
+ " """ \n",
+ " state is a Problem, containing the current state kb \n",
+ " library is a dictionary containing details for every possible refinement. eg: \n",
+ " { \n",
+ " 'HLA': [ \n",
+ " 'Go(Home, SFO)', \n",
+ " 'Go(Home, SFO)', \n",
+ " 'Drive(Home, SFOLongTermParking)', \n",
+ " 'Shuttle(SFOLongTermParking, SFO)', \n",
+ " 'Taxi(Home, SFO)' \n",
+ " ], \n",
+ " 'steps': [ \n",
+ " ['Drive(Home, SFOLongTermParking)', 'Shuttle(SFOLongTermParking, SFO)'], \n",
+ " ['Taxi(Home, SFO)'], \n",
+ " [], \n",
+ " [], \n",
+ " [] \n",
+ " ], \n",
+ " # empty refinements indicate a primitive action \n",
+ " 'precond': [ \n",
+ " ['At(Home) & Have(Car)'], \n",
+ " ['At(Home)'], \n",
+ " ['At(Home) & Have(Car)'], \n",
+ " ['At(SFOLongTermParking)'], \n",
+ " ['At(Home)'] \n",
+ " ], \n",
+ " 'effect': [ \n",
+ " ['At(SFO) & ~At(Home)'], \n",
+ " ['At(SFO) & ~At(Home)'], \n",
+ " ['At(SFOLongTermParking) & ~At(Home)'], \n",
+ " ['At(SFO) & ~At(SFOLongTermParking)'], \n",
+ " ['At(SFO) & ~At(Home)'] \n",
+ " ] \n",
+ " } \n",
+ " """ \n",
+ " e = Expr ( hla . name , hla . args ) \n",
+ " indices = [ i for i , x in enumerate ( library [ 'HLA' ]) if expr ( x ) . op == hla . name ] \n",
+ " for i in indices : \n",
+ " actions = [] \n",
+ " for j in range ( len ( library [ 'steps' ][ i ])): \n",
+ " # find the index of the step [j] of the HLA \n",
+ " index_step = [ k for k , x in enumerate ( library [ 'HLA' ]) if x == library [ 'steps' ][ i ][ j ]][ 0 ] \n",
+ " precond = library [ 'precond' ][ index_step ][ 0 ] # preconditions of step [j] \n",
+ " effect = library [ 'effect' ][ index_step ][ 0 ] # effect of step [j] \n",
+ " actions . append ( HLA ( library [ 'steps' ][ i ][ j ], precond , effect )) \n",
+ " yield actions \n",
+ " \n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "psource(Problem.refinements)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Hierarchical search \n",
+ "\n",
+ "Hierarchical search is a breadth-first implementation of hierarchical forward planning search in the space of refinements. (i.e. repeatedly choose an HLA in the current plan and replace it with one of its refinements, until the plan achieves the goal.) \n",
+ "\n",
+ " \n",
+ "The algorithms input is: problem and hierarchy\n",
+ "- __problem__: is of type Problem \n",
+ "- __hierarchy__: is a dictionary consisting of all the actions and the order in which they are performed. \n",
+ " \n",
+ "\n",
+ "In top level call, initialPlan contains [act] (i.e. is the action to be performed) "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ " Codestin Search App \n",
+ " \n",
+ " \n",
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ " def hierarchical_search ( problem , hierarchy ): \n",
+ " """ \n",
+ " [Figure 11.5] 'Hierarchical Search, a Breadth First Search implementation of Hierarchical \n",
+ " Forward Planning Search' \n",
+ " The problem is a real-world problem defined by the problem class, and the hierarchy is \n",
+ " a dictionary of HLA - refinements (see refinements generator for details) \n",
+ " """ \n",
+ " act = Node ( problem . init , None , [ problem . actions [ 0 ]]) \n",
+ " frontier = deque () \n",
+ " frontier . append ( act ) \n",
+ " while True : \n",
+ " if not frontier : \n",
+ " return None \n",
+ " plan = frontier . popleft () \n",
+ " ( hla , index ) = Problem . find_hla ( plan , hierarchy ) # finds the first non primitive hla in plan actions \n",
+ " prefix = plan . action [: index ] \n",
+ " outcome = Problem ( Problem . result ( problem . init , prefix ), problem . goals , problem . actions ) \n",
+ " suffix = plan . action [ index + 1 :] \n",
+ " if not hla : # hla is None and plan is primitive \n",
+ " if outcome . goal_test (): \n",
+ " return plan . action \n",
+ " else : \n",
+ " for sequence in Problem . refinements ( hla , outcome , hierarchy ): # find refinements \n",
+ " frontier . append ( Node ( outcome . init , plan , prefix + sequence + suffix )) \n",
+ " \n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "psource(Problem.hierarchical_search)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Example\n",
+ "\n",
+ "Suppose that somebody wants to get to the airport. \n",
+ "The possible ways to do so is either get a taxi, or drive to the airport. \n",
+ "Those two actions have some preconditions and some effects. \n",
+ "If you get the taxi, you need to have cash, whereas if you drive you need to have a car. \n",
+ "Thus we define the following hierarchy of possible actions.\n",
+ "\n",
+ "##### hierarchy"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "library = {\n",
+ " 'HLA': ['Go(Home,SFO)', 'Go(Home,SFO)', 'Drive(Home, SFOLongTermParking)', 'Shuttle(SFOLongTermParking, SFO)', 'Taxi(Home, SFO)'],\n",
+ " 'steps': [['Drive(Home, SFOLongTermParking)', 'Shuttle(SFOLongTermParking, SFO)'], ['Taxi(Home, SFO)'], [], [], []],\n",
+ " 'precond': [['At(Home) & Have(Car)'], ['At(Home)'], ['At(Home) & Have(Car)'], ['At(SFOLongTermParking)'], ['At(Home)']],\n",
+ " 'effect': [['At(SFO) & ~At(Home)'], ['At(SFO) & ~At(Home) & ~Have(Cash)'], ['At(SFOLongTermParking) & ~At(Home)'], ['At(SFO) & ~At(LongTermParking)'], ['At(SFO) & ~At(Home) & ~Have(Cash)']] }\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "\n",
+ "the possible actions are the following:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "go_SFO = HLA('Go(Home,SFO)', precond='At(Home)', effect='At(SFO) & ~At(Home)')\n",
+ "taxi_SFO = HLA('Taxi(Home,SFO)', precond='At(Home)', effect='At(SFO) & ~At(Home) & ~Have(Cash)')\n",
+ "drive_SFOLongTermParking = HLA('Drive(Home, SFOLongTermParking)', 'At(Home) & Have(Car)','At(SFOLongTermParking) & ~At(Home)' )\n",
+ "shuttle_SFO = HLA('Shuttle(SFOLongTermParking, SFO)', 'At(SFOLongTermParking)', 'At(SFO) & ~At(LongTermParking)')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Suppose that (our preconditionds are that) we are Home and we have cash and car and our goal is to get to SFO and maintain our cash, and our possible actions are the above. \n",
+ "##### Then our problem is: "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "prob = Problem('At(Home) & Have(Cash) & Have(Car)', 'At(SFO) & Have(Cash)', [go_SFO])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "##### Refinements\n",
+ "\n",
+ "The refinements of the action Go(Home, SFO), are defined as: \n",
+ "['Drive(Home,SFOLongTermParking)', 'Shuttle(SFOLongTermParking, SFO)'], ['Taxi(Home, SFO)']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[HLA(Drive(Home, SFOLongTermParking)), HLA(Shuttle(SFOLongTermParking, SFO))]\n",
+ "[{'completed': False, 'args': (Home, SFOLongTermParking), 'name': 'Drive', 'uses': {}, 'duration': 0, 'effect': [At(SFOLongTermParking), NotAt(Home)], 'consumes': {}, 'precond': [At(Home), Have(Car)]}, {'completed': False, 'args': (SFOLongTermParking, SFO), 'name': 'Shuttle', 'uses': {}, 'duration': 0, 'effect': [At(SFO), NotAt(LongTermParking)], 'consumes': {}, 'precond': [At(SFOLongTermParking)]}] \n",
+ "\n",
+ "[HLA(Taxi(Home, SFO))]\n",
+ "[{'completed': False, 'args': (Home, SFO), 'name': 'Taxi', 'uses': {}, 'duration': 0, 'effect': [At(SFO), NotAt(Home), NotHave(Cash)], 'consumes': {}, 'precond': [At(Home)]}] \n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "for sequence in Problem.refinements(go_SFO, prob, library):\n",
+ " print (sequence)\n",
+ " print([x.__dict__ for x in sequence ], '\\n')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Run the hierarchical search\n",
+ "##### Top level call"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[HLA(Drive(Home, SFOLongTermParking)), HLA(Shuttle(SFOLongTermParking, SFO))] \n",
+ "\n",
+ "[{'completed': False, 'args': (Home, SFOLongTermParking), 'name': 'Drive', 'uses': {}, 'duration': 0, 'effect': [At(SFOLongTermParking), NotAt(Home)], 'consumes': {}, 'precond': [At(Home), Have(Car)]}, {'completed': False, 'args': (SFOLongTermParking, SFO), 'name': 'Shuttle', 'uses': {}, 'duration': 0, 'effect': [At(SFO), NotAt(LongTermParking)], 'consumes': {}, 'precond': [At(SFOLongTermParking)]}]\n"
+ ]
+ }
+ ],
+ "source": [
+ "plan= Problem.hierarchical_search(prob, library)\n",
+ "print (plan, '\\n')\n",
+ "print ([x.__dict__ for x in plan])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Example 2"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "library_2 = {\n",
+ " 'HLA': ['Go(Home,SFO)', 'Go(Home,SFO)', 'Bus(Home, MetroStop)', 'Metro(MetroStop, SFO)' , 'Metro(MetroStop, SFO)', 'Metro1(MetroStop, SFO)', 'Metro2(MetroStop, SFO)' ,'Taxi(Home, SFO)'],\n",
+ " 'steps': [['Bus(Home, MetroStop)', 'Metro(MetroStop, SFO)'], ['Taxi(Home, SFO)'], [], ['Metro1(MetroStop, SFO)'], ['Metro2(MetroStop, SFO)'],[],[],[]],\n",
+ " 'precond': [['At(Home)'], ['At(Home)'], ['At(Home)'], ['At(MetroStop)'], ['At(MetroStop)'],['At(MetroStop)'], ['At(MetroStop)'] ,['At(Home) & Have(Cash)']],\n",
+ " 'effect': [['At(SFO) & ~At(Home)'], ['At(SFO) & ~At(Home) & ~Have(Cash)'], ['At(MetroStop) & ~At(Home)'], ['At(SFO) & ~At(MetroStop)'], ['At(SFO) & ~At(MetroStop)'], ['At(SFO) & ~At(MetroStop)'] , ['At(SFO) & ~At(MetroStop)'] ,['At(SFO) & ~At(Home) & ~Have(Cash)']] \n",
+ " }"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[HLA(Bus(Home, MetroStop)), HLA(Metro1(MetroStop, SFO))] \n",
+ "\n",
+ "[{'completed': False, 'args': (Home, MetroStop), 'name': 'Bus', 'uses': {}, 'duration': 0, 'effect': [At(MetroStop), NotAt(Home)], 'consumes': {}, 'precond': [At(Home)]}, {'completed': False, 'args': (MetroStop, SFO), 'name': 'Metro1', 'uses': {}, 'duration': 0, 'effect': [At(SFO), NotAt(MetroStop)], 'consumes': {}, 'precond': [At(MetroStop)]}]\n"
+ ]
+ }
+ ],
+ "source": [
+ "plan_2 = Problem.hierarchical_search(prob, library_2)\n",
+ "print(plan_2, '\\n')\n",
+ "print([x.__dict__ for x in plan_2])"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.5.3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}
diff --git a/planning_partial_order_planner.ipynb b/planning_partial_order_planner.ipynb
new file mode 100644
index 000000000..4b1a98bb3
--- /dev/null
+++ b/planning_partial_order_planner.ipynb
@@ -0,0 +1,850 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### PARTIAL ORDER PLANNER\n",
+ "A partial-order planning algorithm is significantly different from a total-order planner.\n",
+ "The way a partial-order plan works enables it to take advantage of _problem decomposition_ and work on each subproblem separately.\n",
+ "It works on several subgoals independently, solves them with several subplans, and then combines the plan.\n",
+ " \n",
+ "A partial-order planner also follows the **least commitment** strategy, where it delays making choices for as long as possible.\n",
+ "Variables are not bound unless it is absolutely necessary and new actions are chosen only if the existing actions cannot fulfil the required precondition.\n",
+ " \n",
+ "Any planning algorithm that can place two actions into a plan without specifying which comes first is called a **partial-order planner**.\n",
+ "A partial-order planner searches through the space of plans rather than the space of states, which makes it perform better for certain problems.\n",
+ " \n",
+ " \n",
+ "Let's have a look at the `PartialOrderPlanner` class."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from planning import *\n",
+ "from notebook import psource"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ " Codestin Search App \n",
+ " \n",
+ " \n",
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "class PartialOrderPlanner : \n",
+ "\n",
+ " def __init__ ( self , planningproblem ): \n",
+ " self . planningproblem = planningproblem \n",
+ " self . initialize () \n",
+ "\n",
+ " def initialize ( self ): \n",
+ " """Initialize all variables""" \n",
+ " self . causal_links = [] \n",
+ " self . start = Action ( 'Start' , [], self . planningproblem . init ) \n",
+ " self . finish = Action ( 'Finish' , self . planningproblem . goals , []) \n",
+ " self . actions = set () \n",
+ " self . actions . add ( self . start ) \n",
+ " self . actions . add ( self . finish ) \n",
+ " self . constraints = set () \n",
+ " self . constraints . add (( self . start , self . finish )) \n",
+ " self . agenda = set () \n",
+ " for precond in self . finish . precond : \n",
+ " self . agenda . add (( precond , self . finish )) \n",
+ " self . expanded_actions = self . expand_actions () \n",
+ "\n",
+ " def expand_actions ( self , name = None ): \n",
+ " """Generate all possible actions with variable bindings for precondition selection heuristic""" \n",
+ "\n",
+ " objects = set ( arg for clause in self . planningproblem . init for arg in clause . args ) \n",
+ " expansions = [] \n",
+ " action_list = [] \n",
+ " if name is not None : \n",
+ " for action in self . planningproblem . actions : \n",
+ " if str ( action . name ) == name : \n",
+ " action_list . append ( action ) \n",
+ " else : \n",
+ " action_list = self . planningproblem . actions \n",
+ "\n",
+ " for action in action_list : \n",
+ " for permutation in itertools . permutations ( objects , len ( action . args )): \n",
+ " bindings = unify ( Expr ( action . name , * action . args ), Expr ( action . name , * permutation )) \n",
+ " if bindings is not None : \n",
+ " new_args = [] \n",
+ " for arg in action . args : \n",
+ " if arg in bindings : \n",
+ " new_args . append ( bindings [ arg ]) \n",
+ " else : \n",
+ " new_args . append ( arg ) \n",
+ " new_expr = Expr ( str ( action . name ), * new_args ) \n",
+ " new_preconds = [] \n",
+ " for precond in action . precond : \n",
+ " new_precond_args = [] \n",
+ " for arg in precond . args : \n",
+ " if arg in bindings : \n",
+ " new_precond_args . append ( bindings [ arg ]) \n",
+ " else : \n",
+ " new_precond_args . append ( arg ) \n",
+ " new_precond = Expr ( str ( precond . op ), * new_precond_args ) \n",
+ " new_preconds . append ( new_precond ) \n",
+ " new_effects = [] \n",
+ " for effect in action . effect : \n",
+ " new_effect_args = [] \n",
+ " for arg in effect . args : \n",
+ " if arg in bindings : \n",
+ " new_effect_args . append ( bindings [ arg ]) \n",
+ " else : \n",
+ " new_effect_args . append ( arg ) \n",
+ " new_effect = Expr ( str ( effect . op ), * new_effect_args ) \n",
+ " new_effects . append ( new_effect ) \n",
+ " expansions . append ( Action ( new_expr , new_preconds , new_effects )) \n",
+ "\n",
+ " return expansions \n",
+ "\n",
+ " def find_open_precondition ( self ): \n",
+ " """Find open precondition with the least number of possible actions""" \n",
+ "\n",
+ " number_of_ways = dict () \n",
+ " actions_for_precondition = dict () \n",
+ " for element in self . agenda : \n",
+ " open_precondition = element [ 0 ] \n",
+ " possible_actions = list ( self . actions ) + self . expanded_actions \n",
+ " for action in possible_actions : \n",
+ " for effect in action . effect : \n",
+ " if effect == open_precondition : \n",
+ " if open_precondition in number_of_ways : \n",
+ " number_of_ways [ open_precondition ] += 1 \n",
+ " actions_for_precondition [ open_precondition ] . append ( action ) \n",
+ " else : \n",
+ " number_of_ways [ open_precondition ] = 1 \n",
+ " actions_for_precondition [ open_precondition ] = [ action ] \n",
+ "\n",
+ " number = sorted ( number_of_ways , key = number_of_ways . __getitem__ ) \n",
+ " \n",
+ " for k , v in number_of_ways . items (): \n",
+ " if v == 0 : \n",
+ " return None , None , None \n",
+ "\n",
+ " act1 = None \n",
+ " for element in self . agenda : \n",
+ " if element [ 0 ] == number [ 0 ]: \n",
+ " act1 = element [ 1 ] \n",
+ " break \n",
+ "\n",
+ " if number [ 0 ] in self . expanded_actions : \n",
+ " self . expanded_actions . remove ( number [ 0 ]) \n",
+ "\n",
+ " return number [ 0 ], act1 , actions_for_precondition [ number [ 0 ]] \n",
+ "\n",
+ " def find_action_for_precondition ( self , oprec ): \n",
+ " """Find action for a given precondition""" \n",
+ "\n",
+ " # either \n",
+ " # choose act0 E Actions such that act0 achieves G \n",
+ " for action in self . actions : \n",
+ " for effect in action . effect : \n",
+ " if effect == oprec : \n",
+ " return action , 0 \n",
+ "\n",
+ " # or \n",
+ " # choose act0 E Actions such that act0 achieves G \n",
+ " for action in self . planningproblem . actions : \n",
+ " for effect in action . effect : \n",
+ " if effect . op == oprec . op : \n",
+ " bindings = unify ( effect , oprec ) \n",
+ " if bindings is None : \n",
+ " break \n",
+ " return action , bindings \n",
+ "\n",
+ " def generate_expr ( self , clause , bindings ): \n",
+ " """Generate atomic expression from generic expression given variable bindings""" \n",
+ "\n",
+ " new_args = [] \n",
+ " for arg in clause . args : \n",
+ " if arg in bindings : \n",
+ " new_args . append ( bindings [ arg ]) \n",
+ " else : \n",
+ " new_args . append ( arg ) \n",
+ "\n",
+ " try : \n",
+ " return Expr ( str ( clause . name ), * new_args ) \n",
+ " except : \n",
+ " return Expr ( str ( clause . op ), * new_args ) \n",
+ " \n",
+ " def generate_action_object ( self , action , bindings ): \n",
+ " """Generate action object given a generic action andvariable bindings""" \n",
+ "\n",
+ " # if bindings is 0, it means the action already exists in self.actions \n",
+ " if bindings == 0 : \n",
+ " return action \n",
+ "\n",
+ " # bindings cannot be None \n",
+ " else : \n",
+ " new_expr = self . generate_expr ( action , bindings ) \n",
+ " new_preconds = [] \n",
+ " for precond in action . precond : \n",
+ " new_precond = self . generate_expr ( precond , bindings ) \n",
+ " new_preconds . append ( new_precond ) \n",
+ " new_effects = [] \n",
+ " for effect in action . effect : \n",
+ " new_effect = self . generate_expr ( effect , bindings ) \n",
+ " new_effects . append ( new_effect ) \n",
+ " return Action ( new_expr , new_preconds , new_effects ) \n",
+ "\n",
+ " def cyclic ( self , graph ): \n",
+ " """Check cyclicity of a directed graph""" \n",
+ "\n",
+ " new_graph = dict () \n",
+ " for element in graph : \n",
+ " if element [ 0 ] in new_graph : \n",
+ " new_graph [ element [ 0 ]] . append ( element [ 1 ]) \n",
+ " else : \n",
+ " new_graph [ element [ 0 ]] = [ element [ 1 ]] \n",
+ "\n",
+ " path = set () \n",
+ "\n",
+ " def visit ( vertex ): \n",
+ " path . add ( vertex ) \n",
+ " for neighbor in new_graph . get ( vertex , ()): \n",
+ " if neighbor in path or visit ( neighbor ): \n",
+ " return True \n",
+ " path . remove ( vertex ) \n",
+ " return False \n",
+ "\n",
+ " value = any ( visit ( v ) for v in new_graph ) \n",
+ " return value \n",
+ "\n",
+ " def add_const ( self , constraint , constraints ): \n",
+ " """Add the constraint to constraints if the resulting graph is acyclic""" \n",
+ "\n",
+ " if constraint [ 0 ] == self . finish or constraint [ 1 ] == self . start : \n",
+ " return constraints \n",
+ "\n",
+ " new_constraints = set ( constraints ) \n",
+ " new_constraints . add ( constraint ) \n",
+ "\n",
+ " if self . cyclic ( new_constraints ): \n",
+ " return constraints \n",
+ " return new_constraints \n",
+ "\n",
+ " def is_a_threat ( self , precondition , effect ): \n",
+ " """Check if effect is a threat to precondition""" \n",
+ "\n",
+ " if ( str ( effect . op ) == 'Not' + str ( precondition . op )) or ( 'Not' + str ( effect . op ) == str ( precondition . op )): \n",
+ " if effect . args == precondition . args : \n",
+ " return True \n",
+ " return False \n",
+ "\n",
+ " def protect ( self , causal_link , action , constraints ): \n",
+ " """Check and resolve threats by promotion or demotion""" \n",
+ "\n",
+ " threat = False \n",
+ " for effect in action . effect : \n",
+ " if self . is_a_threat ( causal_link [ 1 ], effect ): \n",
+ " threat = True \n",
+ " break \n",
+ "\n",
+ " if action != causal_link [ 0 ] and action != causal_link [ 2 ] and threat : \n",
+ " # try promotion \n",
+ " new_constraints = set ( constraints ) \n",
+ " new_constraints . add (( action , causal_link [ 0 ])) \n",
+ " if not self . cyclic ( new_constraints ): \n",
+ " constraints = self . add_const (( action , causal_link [ 0 ]), constraints ) \n",
+ " else : \n",
+ " # try demotion \n",
+ " new_constraints = set ( constraints ) \n",
+ " new_constraints . add (( causal_link [ 2 ], action )) \n",
+ " if not self . cyclic ( new_constraints ): \n",
+ " constraints = self . add_const (( causal_link [ 2 ], action ), constraints ) \n",
+ " else : \n",
+ " # both promotion and demotion fail \n",
+ " print ( 'Unable to resolve a threat caused by' , action , 'onto' , causal_link ) \n",
+ " return \n",
+ " return constraints \n",
+ "\n",
+ " def convert ( self , constraints ): \n",
+ " """Convert constraints into a dict of Action to set orderings""" \n",
+ "\n",
+ " graph = dict () \n",
+ " for constraint in constraints : \n",
+ " if constraint [ 0 ] in graph : \n",
+ " graph [ constraint [ 0 ]] . add ( constraint [ 1 ]) \n",
+ " else : \n",
+ " graph [ constraint [ 0 ]] = set () \n",
+ " graph [ constraint [ 0 ]] . add ( constraint [ 1 ]) \n",
+ " return graph \n",
+ "\n",
+ " def toposort ( self , graph ): \n",
+ " """Generate topological ordering of constraints""" \n",
+ "\n",
+ " if len ( graph ) == 0 : \n",
+ " return \n",
+ "\n",
+ " graph = graph . copy () \n",
+ "\n",
+ " for k , v in graph . items (): \n",
+ " v . discard ( k ) \n",
+ "\n",
+ " extra_elements_in_dependencies = _reduce ( set . union , graph . values ()) - set ( graph . keys ()) \n",
+ "\n",
+ " graph . update ({ element : set () for element in extra_elements_in_dependencies }) \n",
+ " while True : \n",
+ " ordered = set ( element for element , dependency in graph . items () if len ( dependency ) == 0 ) \n",
+ " if not ordered : \n",
+ " break \n",
+ " yield ordered \n",
+ " graph = { element : ( dependency - ordered ) for element , dependency in graph . items () if element not in ordered } \n",
+ " if len ( graph ) != 0 : \n",
+ " raise ValueError ( 'The graph is not acyclic and cannot be linearly ordered' ) \n",
+ "\n",
+ " def display_plan ( self ): \n",
+ " """Display causal links, constraints and the plan""" \n",
+ "\n",
+ " print ( 'Causal Links' ) \n",
+ " for causal_link in self . causal_links : \n",
+ " print ( causal_link ) \n",
+ "\n",
+ " print ( ' \\n Constraints' ) \n",
+ " for constraint in self . constraints : \n",
+ " print ( constraint [ 0 ], '<' , constraint [ 1 ]) \n",
+ "\n",
+ " print ( ' \\n Partial Order Plan' ) \n",
+ " print ( list ( reversed ( list ( self . toposort ( self . convert ( self . constraints )))))) \n",
+ "\n",
+ " def execute ( self , display = True ): \n",
+ " """Execute the algorithm""" \n",
+ "\n",
+ " step = 1 \n",
+ " self . tries = 1 \n",
+ " while len ( self . agenda ) > 0 : \n",
+ " step += 1 \n",
+ " # select <G, act1> from Agenda \n",
+ " try : \n",
+ " G , act1 , possible_actions = self . find_open_precondition () \n",
+ " except IndexError : \n",
+ " print ( 'Probably Wrong' ) \n",
+ " break \n",
+ "\n",
+ " act0 = possible_actions [ 0 ] \n",
+ " # remove <G, act1> from Agenda \n",
+ " self . agenda . remove (( G , act1 )) \n",
+ "\n",
+ " # For actions with variable number of arguments, use least commitment principle \n",
+ " # act0_temp, bindings = self.find_action_for_precondition(G) \n",
+ " # act0 = self.generate_action_object(act0_temp, bindings) \n",
+ "\n",
+ " # Actions = Actions U {act0} \n",
+ " self . actions . add ( act0 ) \n",
+ "\n",
+ " # Constraints = add_const(start < act0, Constraints) \n",
+ " self . constraints = self . add_const (( self . start , act0 ), self . constraints ) \n",
+ "\n",
+ " # for each CL E CausalLinks do \n",
+ " # Constraints = protect(CL, act0, Constraints) \n",
+ " for causal_link in self . causal_links : \n",
+ " self . constraints = self . protect ( causal_link , act0 , self . constraints ) \n",
+ "\n",
+ " # Agenda = Agenda U {<P, act0>: P is a precondition of act0} \n",
+ " for precondition in act0 . precond : \n",
+ " self . agenda . add (( precondition , act0 )) \n",
+ "\n",
+ " # Constraints = add_const(act0 < act1, Constraints) \n",
+ " self . constraints = self . add_const (( act0 , act1 ), self . constraints ) \n",
+ "\n",
+ " # CausalLinks U {<act0, G, act1>} \n",
+ " if ( act0 , G , act1 ) not in self . causal_links : \n",
+ " self . causal_links . append (( act0 , G , act1 )) \n",
+ "\n",
+ " # for each A E Actions do \n",
+ " # Constraints = protect(<act0, G, act1>, A, Constraints) \n",
+ " for action in self . actions : \n",
+ " self . constraints = self . protect (( act0 , G , act1 ), action , self . constraints ) \n",
+ "\n",
+ " if step > 200 : \n",
+ " print ( 'Couldn \\' t find a solution' ) \n",
+ " return None , None \n",
+ "\n",
+ " if display : \n",
+ " self . display_plan () \n",
+ " else : \n",
+ " return self . constraints , self . causal_links \n",
+ " \n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "psource(PartialOrderPlanner)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We will first describe the data-structures and helper methods used, followed by the algorithm used to find a partial-order plan."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Each plan has the following four components:\n",
+ "\n",
+ "1. **`actions`**: a set of actions that make up the steps of the plan.\n",
+ "`actions` is always a subset of `pddl.actions` the set of possible actions for the given planning problem. \n",
+ "The `start` and `finish` actions are dummy actions defined to bring uniformity to the problem. The `start` action has no preconditions and its effects constitute the initial state of the planning problem. \n",
+ "The `finish` action has no effects and its preconditions constitute the goal state of the planning problem.\n",
+ "The empty plan consists of just these two dummy actions.\n",
+ "2. **`constraints`**: a set of temporal constraints that define the order of performing the actions relative to each other.\n",
+ "`constraints` does not define a linear ordering, rather it usually represents a directed graph which is also acyclic if the plan is consistent.\n",
+ "Each ordering is of the form A < B, which reads as \"A before B\" and means that action A _must_ be executed sometime before action B, but not necessarily immediately before.\n",
+ "`constraints` stores these as a set of tuples `(Action(A), Action(B))` which is interpreted as given above.\n",
+ "A constraint cannot be added to `constraints` if it breaks the acyclicity of the existing graph.\n",
+ "3. **`causal_links`**: a set of causal-links. \n",
+ "A causal link between two actions _A_ and _B_ in the plan is written as _A_ --_p_--> _B_ and is read as \"A achieves p for B\".\n",
+ "This imples that _p_ is an effect of _A_ and a precondition of _B_.\n",
+ "It also asserts that _p_ must remain true from the time of action _A_ to the time of action _B_.\n",
+ "Any violation of this rule is called a threat and must be resolved immediately by adding suitable ordering constraints.\n",
+ "`causal_links` stores this information as tuples `(Action(A), precondition(p), Action(B))` which is interpreted as given above.\n",
+ "Causal-links can also be called **protection-intervals**, because the link _A_ --_p_--> _B_ protects _p_ from being negated over the interval from _A_ to _B_.\n",
+ "4. **`agenda`**: a set of open-preconditions.\n",
+ "A precondition is open if it is not achieved by some action in the plan.\n",
+ "Planners will work to reduce the set of open preconditions to the empty set, without introducing a contradiction.\n",
+ "`agenda` stored this information as tuples `(precondition(p), Action(A))` where p is a precondition of the action A.\n",
+ "\n",
+ "A **consistent plan** is a plan in which there are no cycles in the ordering constraints and no conflicts with the causal-links.\n",
+ "A consistent plan with no open preconditions is a **solution**.\n",
+ " \n",
+ " \n",
+ "Let's briefly glance over the helper functions before going into the actual algorithm.\n",
+ " \n",
+ "**`expand_actions`**: generates all possible actions with variable bindings for use as a heuristic of selection of an open precondition.\n",
+ " \n",
+ "**`find_open_precondition`**: finds a precondition from the agenda with the least number of actions that fulfil that precondition.\n",
+ "This heuristic helps form mandatory ordering constraints and causal-links to further simplify the problem and reduce the probability of encountering a threat.\n",
+ " \n",
+ "**`find_action_for_precondition`**: finds an action that fulfils the given precondition along with the absolutely necessary variable bindings in accordance with the principle of _least commitment_.\n",
+ "In case of multiple possible actions, the action with the least number of effects is chosen to minimize the chances of encountering a threat.\n",
+ " \n",
+ "**`cyclic`**: checks if a directed graph is cyclic.\n",
+ " \n",
+ "**`add_const`**: adds `constraint` to `constraints` if the newly formed graph is acyclic and returns `constraints` otherwise.\n",
+ " \n",
+ "**`is_a_threat`**: checks if the given `effect` negates the given `precondition`.\n",
+ " \n",
+ "**`protect`**: checks if the given `action` poses a threat to the given `causal_link`.\n",
+ "If so, the threat is resolved by either promotion or demotion, whichever generates acyclic temporal constraints.\n",
+ "If neither promotion or demotion work, the chosen action is not the correct fit or the planning problem cannot be solved altogether.\n",
+ " \n",
+ "**`convert`**: converts a graph from a list of edges to an `Action` : `set` mapping, for use in topological sorting.\n",
+ " \n",
+ "**`toposort`**: a generator function that generates a topological ordering of a given graph as a list of sets.\n",
+ "Each set contains an action or several actions.\n",
+ "If a set has more that one action in it, it means that permutations between those actions also produce a valid plan.\n",
+ " \n",
+ "**`display_plan`**: displays the `causal_links`, `constraints` and the partial order plan generated from `toposort`.\n",
+ " "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The **`execute`** method executes the algorithm, which is summarized below:\n",
+ " \n",
+ "1. An open precondition is selected (a sub-goal that we want to achieve).\n",
+ "2. An action that fulfils the open precondition is chosen.\n",
+ "3. Temporal constraints are updated.\n",
+ "4. Existing causal links are protected. Protection is a method that checks if the causal links conflict\n",
+ " and if they do, temporal constraints are added to fix the threats.\n",
+ "5. The set of open preconditions is updated.\n",
+ "6. Temporal constraints of the selected action and the next action are established.\n",
+ "7. A new causal link is added between the selected action and the owner of the open precondition.\n",
+ "8. The set of new causal links is checked for threats and if found, the threat is removed by either promotion or demotion.\n",
+ " If promotion or demotion is unable to solve the problem, the planning problem cannot be solved with the current sequence of actions\n",
+ " or it may not be solvable at all.\n",
+ "9. These steps are repeated until the set of open preconditions is empty."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "A partial-order plan can be used to generate different valid total-order plans.\n",
+ "This step is called **linearization** of the partial-order plan.\n",
+ "All possible linearizations of a partial-order plan for `socks_and_shoes` looks like this.\n",
+ " \n",
+ "\n",
+ " \n",
+ "Linearization can be carried out in many ways, but the most efficient way is to represent the set of temporal constraints as a directed graph.\n",
+ "We can easily realize that the graph should also be acyclic as cycles in constraints means that the constraints are inconsistent.\n",
+ "This acyclicity is enforced by the `add_const` method, which adds a new constraint only if the acyclicity of the existing graph is not violated.\n",
+ "The `protect` method also checks for acyclicity of the newly-added temporal constraints to make a decision between promotion and demotion in case of a threat.\n",
+ "This property of a graph created from the temporal constraints of a valid partial-order plan allows us to use topological sort to order the constraints linearly.\n",
+ "A topological sort may produce several different valid solutions for a given directed acyclic graph."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Now that we know how `PartialOrderPlanner` works, let's solve a few problems using it."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Causal Links\n",
+ "(Action(PutOn(Spare, Axle)), At(Spare, Axle), Action(Finish))\n",
+ "(Action(Start), Tire(Spare), Action(PutOn(Spare, Axle)))\n",
+ "(Action(Remove(Flat, Axle)), NotAt(Flat, Axle), Action(PutOn(Spare, Axle)))\n",
+ "(Action(Start), At(Flat, Axle), Action(Remove(Flat, Axle)))\n",
+ "(Action(Remove(Spare, Trunk)), At(Spare, Ground), Action(PutOn(Spare, Axle)))\n",
+ "(Action(Start), At(Spare, Trunk), Action(Remove(Spare, Trunk)))\n",
+ "(Action(Remove(Flat, Axle)), At(Flat, Ground), Action(Finish))\n",
+ "\n",
+ "Constraints\n",
+ "Action(Remove(Flat, Axle)) < Action(PutOn(Spare, Axle))\n",
+ "Action(Start) < Action(Finish)\n",
+ "Action(Remove(Spare, Trunk)) < Action(PutOn(Spare, Axle))\n",
+ "Action(Start) < Action(Remove(Spare, Trunk))\n",
+ "Action(Start) < Action(Remove(Flat, Axle))\n",
+ "Action(Remove(Flat, Axle)) < Action(Finish)\n",
+ "Action(PutOn(Spare, Axle)) < Action(Finish)\n",
+ "Action(Start) < Action(PutOn(Spare, Axle))\n",
+ "\n",
+ "Partial Order Plan\n",
+ "[{Action(Start)}, {Action(Remove(Flat, Axle)), Action(Remove(Spare, Trunk))}, {Action(PutOn(Spare, Axle))}, {Action(Finish)}]\n"
+ ]
+ }
+ ],
+ "source": [
+ "st = spare_tire()\n",
+ "pop = PartialOrderPlanner(st)\n",
+ "pop.execute()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We observe that in the given partial order plan, Remove(Flat, Axle) and Remove(Spare, Trunk) are in the same set.\n",
+ "This means that the order of performing these actions does not affect the final outcome.\n",
+ "That aside, we also see that the PutOn(Spare, Axle) action has to be performed after both the Remove actions are complete, which seems logically consistent."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Causal Links\n",
+ "(Action(FromTable(C, B)), On(C, B), Action(Finish))\n",
+ "(Action(FromTable(B, A)), On(B, A), Action(Finish))\n",
+ "(Action(Start), OnTable(B), Action(FromTable(B, A)))\n",
+ "(Action(Start), OnTable(C), Action(FromTable(C, B)))\n",
+ "(Action(Start), Clear(C), Action(FromTable(C, B)))\n",
+ "(Action(Start), Clear(A), Action(FromTable(B, A)))\n",
+ "(Action(ToTable(A, B)), Clear(B), Action(FromTable(C, B)))\n",
+ "(Action(Start), On(A, B), Action(ToTable(A, B)))\n",
+ "(Action(ToTable(A, B)), Clear(B), Action(FromTable(B, A)))\n",
+ "(Action(Start), Clear(A), Action(ToTable(A, B)))\n",
+ "\n",
+ "Constraints\n",
+ "Action(Start) < Action(FromTable(C, B))\n",
+ "Action(FromTable(B, A)) < Action(FromTable(C, B))\n",
+ "Action(Start) < Action(FromTable(B, A))\n",
+ "Action(Start) < Action(ToTable(A, B))\n",
+ "Action(Start) < Action(Finish)\n",
+ "Action(FromTable(B, A)) < Action(Finish)\n",
+ "Action(FromTable(C, B)) < Action(Finish)\n",
+ "Action(ToTable(A, B)) < Action(FromTable(B, A))\n",
+ "Action(ToTable(A, B)) < Action(FromTable(C, B))\n",
+ "\n",
+ "Partial Order Plan\n",
+ "[{Action(Start)}, {Action(ToTable(A, B))}, {Action(FromTable(B, A))}, {Action(FromTable(C, B))}, {Action(Finish)}]\n"
+ ]
+ }
+ ],
+ "source": [
+ "sbw = simple_blocks_world()\n",
+ "pop = PartialOrderPlanner(sbw)\n",
+ "pop.execute()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "collapsed": true
+ },
+ "source": [
+ "We see that this plan does not have flexibility in selecting actions, ie, actions should be performed in this order and this order only, to successfully reach the goal state."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Causal Links\n",
+ "(Action(RightShoe), RightShoeOn, Action(Finish))\n",
+ "(Action(LeftShoe), LeftShoeOn, Action(Finish))\n",
+ "(Action(LeftSock), LeftSockOn, Action(LeftShoe))\n",
+ "(Action(RightSock), RightSockOn, Action(RightShoe))\n",
+ "\n",
+ "Constraints\n",
+ "Action(LeftSock) < Action(LeftShoe)\n",
+ "Action(RightSock) < Action(RightShoe)\n",
+ "Action(Start) < Action(RightShoe)\n",
+ "Action(Start) < Action(Finish)\n",
+ "Action(LeftShoe) < Action(Finish)\n",
+ "Action(Start) < Action(RightSock)\n",
+ "Action(Start) < Action(LeftShoe)\n",
+ "Action(Start) < Action(LeftSock)\n",
+ "Action(RightShoe) < Action(Finish)\n",
+ "\n",
+ "Partial Order Plan\n",
+ "[{Action(Start)}, {Action(LeftSock), Action(RightSock)}, {Action(LeftShoe), Action(RightShoe)}, {Action(Finish)}]\n"
+ ]
+ }
+ ],
+ "source": [
+ "ss = socks_and_shoes()\n",
+ "pop = PartialOrderPlanner(ss)\n",
+ "pop.execute()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "collapsed": true
+ },
+ "source": [
+ "This plan again doesn't have constraints in selecting socks or shoes.\n",
+ "As long as both socks are worn before both shoes, we are fine.\n",
+ "Notice however, there is one valid solution,\n",
+ " \n",
+ "LeftSock -> LeftShoe -> RightSock -> RightShoe\n",
+ " \n",
+ "that the algorithm could not find as it cannot be represented as a general partially-ordered plan but is a specific total-order solution."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Runtime differences\n",
+ "Let's briefly take a look at the running time of all the three algorithms on the `socks_and_shoes` problem."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ss = socks_and_shoes()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "198 µs ± 3.53 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%timeit\n",
+ "GraphPlan(ss).execute()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "844 µs ± 23.8 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%timeit\n",
+ "Linearize(ss).execute()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "258 µs ± 4.03 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%timeit\n",
+ "PartialOrderPlanner(ss).execute(display=False)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We observe that `GraphPlan` is about 4 times faster than `Linearize` because `Linearize` essentially runs a `GraphPlan` subroutine under the hood and then carries out some transformations on the solved planning-graph.\n",
+ " \n",
+ "We also find that `GraphPlan` is slightly faster than `PartialOrderPlanner`, but this is mainly due to the `expand_actions` method in `PartialOrderPlanner` that slows it down as it generates all possible permutations of actions and variable bindings.\n",
+ " \n",
+ "Without heuristic functions, `PartialOrderPlanner` will be atleast as fast as `GraphPlan`, if not faster, but will have a higher tendency to encounter threats and conflicts which might take additional time to resolve.\n",
+ " \n",
+ "Different planning algorithms work differently for different problems."
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.5.3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}
diff --git a/planning_total_order_planner.ipynb b/planning_total_order_planner.ipynb
new file mode 100644
index 000000000..b94941ece
--- /dev/null
+++ b/planning_total_order_planner.ipynb
@@ -0,0 +1,341 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### TOTAL ORDER PLANNER\n",
+ "\n",
+ "In mathematical terminology, **total order**, **linear order** or **simple order** refers to a set *X* which is said to be totally ordered under ≤ if the following statements hold for all *a*, *b* and *c* in *X*:\n",
+ " \n",
+ "If *a* ≤ *b* and *b* ≤ *a*, then *a* = *b* (antisymmetry).\n",
+ " \n",
+ "If *a* ≤ *b* and *b* ≤ *c*, then *a* ≤ *c* (transitivity).\n",
+ " \n",
+ "*a* ≤ *b* or *b* ≤ *a* (connex relation).\n",
+ "\n",
+ " \n",
+ "In simpler terms, a total order plan is a linear ordering of actions to be taken to reach the goal state.\n",
+ "There may be several different total-order plans for a particular goal depending on the problem.\n",
+ " \n",
+ " \n",
+ "In the module, the `Linearize` class solves problems using this paradigm.\n",
+ "At its core, the `Linearize` uses a solved planning graph from `GraphPlan` and finds a valid total-order solution for it.\n",
+ "Let's have a look at the class."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from planning import *\n",
+ "from notebook import psource"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ " Codestin Search App \n",
+ " \n",
+ " \n",
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "class Linearize : \n",
+ "\n",
+ " def __init__ ( self , planningproblem ): \n",
+ " self . planningproblem = planningproblem \n",
+ "\n",
+ " def filter ( self , solution ): \n",
+ " """Filter out persistence actions from a solution""" \n",
+ "\n",
+ " new_solution = [] \n",
+ " for section in solution [ 0 ]: \n",
+ " new_section = [] \n",
+ " for operation in section : \n",
+ " if not ( operation . op [ 0 ] == 'P' and operation . op [ 1 ] . isupper ()): \n",
+ " new_section . append ( operation ) \n",
+ " new_solution . append ( new_section ) \n",
+ " return new_solution \n",
+ "\n",
+ " def orderlevel ( self , level , planningproblem ): \n",
+ " """Return valid linear order of actions for a given level""" \n",
+ "\n",
+ " for permutation in itertools . permutations ( level ): \n",
+ " temp = copy . deepcopy ( planningproblem ) \n",
+ " count = 0 \n",
+ " for action in permutation : \n",
+ " try : \n",
+ " temp . act ( action ) \n",
+ " count += 1 \n",
+ " except : \n",
+ " count = 0 \n",
+ " temp = copy . deepcopy ( planningproblem ) \n",
+ " break \n",
+ " if count == len ( permutation ): \n",
+ " return list ( permutation ), temp \n",
+ " return None \n",
+ "\n",
+ " def execute ( self ): \n",
+ " """Finds total-order solution for a planning graph""" \n",
+ "\n",
+ " graphplan_solution = GraphPlan ( self . planningproblem ) . execute () \n",
+ " filtered_solution = self . filter ( graphplan_solution ) \n",
+ " ordered_solution = [] \n",
+ " planningproblem = self . planningproblem \n",
+ " for level in filtered_solution : \n",
+ " level_solution , planningproblem = self . orderlevel ( level , planningproblem ) \n",
+ " for element in level_solution : \n",
+ " ordered_solution . append ( element ) \n",
+ "\n",
+ " return ordered_solution \n",
+ " \n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "psource(Linearize)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The `filter` method removes the persistence actions (if any) from the planning graph representation.\n",
+ " \n",
+ "The `orderlevel` method finds a valid total-ordering of a specified level of the planning-graph, given the state of the graph after the previous level.\n",
+ " \n",
+ "The `execute` method sequentially calls `orderlevel` for all the levels in the planning-graph and returns the final total-order solution.\n",
+ " \n",
+ " \n",
+ "Let's look at some examples."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[Load(C1, P1, SFO),\n",
+ " Fly(P1, SFO, JFK),\n",
+ " Load(C2, P2, JFK),\n",
+ " Fly(P2, JFK, SFO),\n",
+ " Unload(C2, P2, SFO),\n",
+ " Unload(C1, P1, JFK)]"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# total-order solution for air_cargo problem\n",
+ "Linearize(air_cargo()).execute()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[Remove(Spare, Trunk), Remove(Flat, Axle), PutOn(Spare, Axle)]"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# total-order solution for spare_tire problem\n",
+ "Linearize(spare_tire()).execute()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[MoveToTable(C, A), Move(B, Table, C), Move(A, Table, B)]"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# total-order solution for three_block_tower problem\n",
+ "Linearize(three_block_tower()).execute()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[ToTable(A, B), FromTable(B, A), FromTable(C, B)]"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# total-order solution for simple_blocks_world problem\n",
+ "Linearize(simple_blocks_world()).execute()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[RightSock, LeftSock, RightShoe, LeftShoe]"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# total-order solution for socks_and_shoes problem\n",
+ "Linearize(socks_and_shoes()).execute()"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.5.3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}
diff --git a/probabilistic_learning.py b/probabilistic_learning.py
new file mode 100644
index 000000000..1138e702d
--- /dev/null
+++ b/probabilistic_learning.py
@@ -0,0 +1,154 @@
+"""Learning probabilistic models. (Chapters 20)"""
+
+import heapq
+
+from utils import weighted_sampler, product, gaussian
+
+
+class CountingProbDist:
+ """
+ A probability distribution formed by observing and counting examples.
+ If p is an instance of this class and o is an observed value, then
+ there are 3 main operations:
+ p.add(o) increments the count for observation o by 1.
+ p.sample() returns a random element from the distribution.
+ p[o] returns the probability for o (as in a regular ProbDist).
+ """
+
+ def __init__(self, observations=None, default=0):
+ """
+ Create a distribution, and optionally add in some observations.
+ By default this is an unsmoothed distribution, but saying default=1,
+ for example, gives you add-one smoothing.
+ """
+ if observations is None:
+ observations = []
+ self.dictionary = {}
+ self.n_obs = 0
+ self.default = default
+ self.sampler = None
+
+ for o in observations:
+ self.add(o)
+
+ def add(self, o):
+ """Add an observation o to the distribution."""
+ self.smooth_for(o)
+ self.dictionary[o] += 1
+ self.n_obs += 1
+ self.sampler = None
+
+ def smooth_for(self, o):
+ """
+ Include o among the possible observations, whether or not
+ it's been observed yet.
+ """
+ if o not in self.dictionary:
+ self.dictionary[o] = self.default
+ self.n_obs += self.default
+ self.sampler = None
+
+ def __getitem__(self, item):
+ """Return an estimate of the probability of item."""
+ self.smooth_for(item)
+ return self.dictionary[item] / self.n_obs
+
+ # (top() and sample() are not used in this module, but elsewhere.)
+
+ def top(self, n):
+ """Return (count, obs) tuples for the n most frequent observations."""
+ return heapq.nlargest(n, [(v, k) for (k, v) in self.dictionary.items()])
+
+ def sample(self):
+ """Return a random sample from the distribution."""
+ if self.sampler is None:
+ self.sampler = weighted_sampler(list(self.dictionary.keys()), list(self.dictionary.values()))
+ return self.sampler()
+
+
+def NaiveBayesLearner(dataset, continuous=True, simple=False):
+ if simple:
+ return NaiveBayesSimple(dataset)
+ if continuous:
+ return NaiveBayesContinuous(dataset)
+ else:
+ return NaiveBayesDiscrete(dataset)
+
+
+def NaiveBayesSimple(distribution):
+ """
+ A simple naive bayes classifier that takes as input a dictionary of
+ CountingProbDist objects and classifies items according to these distributions.
+ The input dictionary is in the following form:
+ (ClassName, ClassProb): CountingProbDist
+ """
+ target_dist = {c_name: prob for c_name, prob in distribution.keys()}
+ attr_dists = {c_name: count_prob for (c_name, _), count_prob in distribution.items()}
+
+ def predict(example):
+ """Predict the target value for example. Calculate probabilities for each
+ class and pick the max."""
+
+ def class_probability(target_val):
+ attr_dist = attr_dists[target_val]
+ return target_dist[target_val] * product(attr_dist[a] for a in example)
+
+ return max(target_dist.keys(), key=class_probability)
+
+ return predict
+
+
+def NaiveBayesDiscrete(dataset):
+ """
+ Just count how many times each value of each input attribute
+ occurs, conditional on the target value. Count the different
+ target values too.
+ """
+
+ target_vals = dataset.values[dataset.target]
+ target_dist = CountingProbDist(target_vals)
+ attr_dists = {(gv, attr): CountingProbDist(dataset.values[attr]) for gv in target_vals for attr in dataset.inputs}
+ for example in dataset.examples:
+ target_val = example[dataset.target]
+ target_dist.add(target_val)
+ for attr in dataset.inputs:
+ attr_dists[target_val, attr].add(example[attr])
+
+ def predict(example):
+ """
+ Predict the target value for example. Consider each possible value,
+ and pick the most likely by looking at each attribute independently.
+ """
+
+ def class_probability(target_val):
+ return (target_dist[target_val] * product(attr_dists[target_val, attr][example[attr]]
+ for attr in dataset.inputs))
+
+ return max(target_vals, key=class_probability)
+
+ return predict
+
+
+def NaiveBayesContinuous(dataset):
+ """
+ Count how many times each target value occurs.
+ Also, find the means and deviations of input attribute values for each target value.
+ """
+ means, deviations = dataset.find_means_and_deviations()
+
+ target_vals = dataset.values[dataset.target]
+ target_dist = CountingProbDist(target_vals)
+
+ def predict(example):
+ """Predict the target value for example. Consider each possible value,
+ and pick the most likely by looking at each attribute independently."""
+
+ def class_probability(target_val):
+ prob = target_dist[target_val]
+ for attr in dataset.inputs:
+ prob *= gaussian(means[target_val][attr], deviations[target_val][attr], example[attr])
+ return prob
+
+ return max(target_vals, key=class_probability)
+
+ return predict
diff --git a/probability.ipynb b/probability.ipynb
index 7b1cd3605..fe9643a83 100644
--- a/probability.ipynb
+++ b/probability.ipynb
@@ -2,55 +2,243 @@
"cells": [
{
"cell_type": "markdown",
- "metadata": {
- "collapsed": false
- },
+ "metadata": {},
"source": [
"# Probability \n",
"\n",
- "This IPy notebook acts as supporting material for **Chapter 13 Quantifying Uncertainty**, **Chapter 14 Probabilistic Reasoning** and **Chapter 15 Probabilistic Reasoning over Time** of the book* Artificial Intelligence: A Modern Approach*. This notebook makes use of the implementations in probability.py module. Let us import everything from the probability module. It might be helpful to view the source of some of our implementations. Please refer to the Introductory IPy file for more details on how to do so."
+ "This IPy notebook acts as supporting material for topics covered in **Chapter 13 Quantifying Uncertainty**, **Chapter 14 Probabilistic Reasoning**, **Chapter 15 Probabilistic Reasoning over Time**, **Chapter 16 Making Simple Decisions** and parts of **Chapter 25 Robotics** of the book* Artificial Intelligence: A Modern Approach*. This notebook makes use of the implementations in probability.py module. Let us import everything from the probability module. It might be helpful to view the source of some of our implementations. Please refer to the Introductory IPy file for more details on how to do so."
]
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true
- },
+ "execution_count": 1,
+ "metadata": {},
"outputs": [],
"source": [
- "from probability import *"
+ "from probability import *\n",
+ "from utils import print_table\n",
+ "from notebook import psource, pseudocode, heatmap"
]
},
{
"cell_type": "markdown",
- "metadata": {
- "collapsed": true
- },
+ "metadata": {},
+ "source": [
+ "## CONTENTS\n",
+ "- Probability Distribution\n",
+ " - Joint probability distribution\n",
+ " - Inference using full joint distributions\n",
+ " \n",
+ "- Bayesian Networks\n",
+ " - BayesNode\n",
+ " - BayesNet\n",
+ " - Exact Inference in Bayesian Networks\n",
+ " - Enumeration\n",
+ " - Variable elimination\n",
+ " - Approximate Inference in Bayesian Networks\n",
+ " - Prior sample\n",
+ " - Rejection sampling\n",
+ " - Likelihood weighting\n",
+ " - Gibbs sampling\n",
+ " \n",
+ "- Hidden Markov Models\n",
+ " - Inference in Hidden Markov Models\n",
+ " - Forward-backward\n",
+ " - Fixed lag smoothing\n",
+ " - Particle filtering\n",
+ " \n",
+ " \n",
+ "- Monte Carlo Localization\n",
+ "- Decision Theoretic Agent\n",
+ "- Information Gathering Agent"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
"source": [
- "## Probability Distribution\n",
+ "## PROBABILITY DISTRIBUTION\n",
"\n",
"Let us begin by specifying discrete probability distributions. The class **ProbDist** defines a discrete probability distribution. We name our random variable and then assign probabilities to the different values of the random variable. Assigning probabilities to the values works similar to that of using a dictionary with keys being the Value and we assign to it the probability. This is possible because of the magic methods **_ _getitem_ _** and **_ _setitem_ _** which store the probabilities in the prob dict of the object. You can keep the source window open alongside while playing with the rest of the code to get a better understanding."
]
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ " Codestin Search App \n",
+ " \n",
+ " \n",
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "class ProbDist : \n",
+ " """A discrete probability distribution. You name the random variable \n",
+ " in the constructor, then assign and query probability of values. \n",
+ " >>> P = ProbDist('Flip'); P['H'], P['T'] = 0.25, 0.75; P['H'] \n",
+ " 0.25 \n",
+ " >>> P = ProbDist('X', {'lo': 125, 'med': 375, 'hi': 500}) \n",
+ " >>> P['lo'], P['med'], P['hi'] \n",
+ " (0.125, 0.375, 0.5) \n",
+ " """ \n",
+ "\n",
+ " def __init__ ( self , varname = '?' , freqs = None ): \n",
+ " """If freqs is given, it is a dictionary of values - frequency pairs, \n",
+ " then ProbDist is normalized.""" \n",
+ " self . prob = {} \n",
+ " self . varname = varname \n",
+ " self . values = [] \n",
+ " if freqs : \n",
+ " for ( v , p ) in freqs . items (): \n",
+ " self [ v ] = p \n",
+ " self . normalize () \n",
+ "\n",
+ " def __getitem__ ( self , val ): \n",
+ " """Given a value, return P(value).""" \n",
+ " try : \n",
+ " return self . prob [ val ] \n",
+ " except KeyError : \n",
+ " return 0 \n",
+ "\n",
+ " def __setitem__ ( self , val , p ): \n",
+ " """Set P(val) = p.""" \n",
+ " if val not in self . values : \n",
+ " self . values . append ( val ) \n",
+ " self . prob [ val ] = p \n",
+ "\n",
+ " def normalize ( self ): \n",
+ " """Make sure the probabilities of all values sum to 1. \n",
+ " Returns the normalized distribution. \n",
+ " Raises a ZeroDivisionError if the sum of the values is 0.""" \n",
+ " total = sum ( self . prob . values ()) \n",
+ " if not isclose ( total , 1.0 ): \n",
+ " for val in self . prob : \n",
+ " self . prob [ val ] /= total \n",
+ " return self \n",
+ "\n",
+ " def show_approx ( self , numfmt = '{:.3g}' ): \n",
+ " """Show the probabilities rounded and sorted by key, for the \n",
+ " sake of portable doctests.""" \n",
+ " return ', ' . join ([( '{}: ' + numfmt ) . format ( v , p ) \n",
+ " for ( v , p ) in sorted ( self . prob . items ())]) \n",
+ "\n",
+ " def __repr__ ( self ): \n",
+ " return "P({})" . format ( self . varname ) \n",
+ " \n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
"source": [
- "%psource ProbDist"
+ "psource(ProbDist)"
]
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.75"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"p = ProbDist('Flip')\n",
"p['H'], p['T'] = 0.25, 0.75\n",
@@ -61,28 +249,46 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "The first parameter of the constructor **varname** has a default value of '?'. So if the name is not passed it defaults to ?. The keyword argument **freqs** can be a dictionary of values of random variable:probability. These are then normalized such that the probability values sum upto 1 using the **normalize** method."
+ "The first parameter of the constructor **varname** has a default value of '?'. So if the name is not passed it defaults to ?. The keyword argument **freqs** can be a dictionary of values of random variable: probability. These are then normalized such that the probability values sum upto 1 using the **normalize** method."
]
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'?'"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"p = ProbDist(freqs={'low': 125, 'medium': 375, 'high': 500})\n",
- "p.varname\n"
+ "p.varname"
]
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(0.125, 0.375, 0.5)"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"(p['low'], p['medium'], p['high'])"
]
@@ -96,11 +302,20 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['low', 'medium', 'high']"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"p.values"
]
@@ -109,16 +324,25 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "The distribution by default is not normalized if values are added incremently. We can still force normalization by invoking the **normalize** method."
+ "The distribution by default is not normalized if values are added incrementally. We can still force normalization by invoking the **normalize** method."
]
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(50, 114, 64)"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"p = ProbDist('Y')\n",
"p['Cat'] = 50\n",
@@ -129,11 +353,20 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(0.21929824561403508, 0.5, 0.2807017543859649)"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"p.normalize()\n",
"(p['Cat'], p['Dog'], p['Mice'])"
@@ -148,11 +381,20 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'Cat: 0.219, Dog: 0.5, Mice: 0.281'"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"p.show_approx()"
]
@@ -171,35 +413,175 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(8, 10)"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"event = {'A': 10, 'B': 9, 'C': 8}\n",
"variables = ['C', 'A']\n",
- "event_values (event, variables)"
+ "event_values(event, variables)"
]
},
{
"cell_type": "markdown",
- "metadata": {
- "collapsed": true
- },
+ "metadata": {},
"source": [
"_A probability model is completely determined by the joint distribution for all of the random variables._ (**Section 13.3**) The probability module implements these as the class **JointProbDist** which inherits from the **ProbDist** class. This class specifies a discrete probability distribute over a set of variables. "
]
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ " Codestin Search App \n",
+ " \n",
+ " \n",
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "class JointProbDist ( ProbDist ): \n",
+ " """A discrete probability distribute over a set of variables. \n",
+ " >>> P = JointProbDist(['X', 'Y']); P[1, 1] = 0.25 \n",
+ " >>> P[1, 1] \n",
+ " 0.25 \n",
+ " >>> P[dict(X=0, Y=1)] = 0.5 \n",
+ " >>> P[dict(X=0, Y=1)] \n",
+ " 0.5""" \n",
+ "\n",
+ " def __init__ ( self , variables ): \n",
+ " self . prob = {} \n",
+ " self . variables = variables \n",
+ " self . vals = defaultdict ( list ) \n",
+ "\n",
+ " def __getitem__ ( self , values ): \n",
+ " """Given a tuple or dict of values, return P(values).""" \n",
+ " values = event_values ( values , self . variables ) \n",
+ " return ProbDist . __getitem__ ( self , values ) \n",
+ "\n",
+ " def __setitem__ ( self , values , p ): \n",
+ " """Set P(values) = p. Values can be a tuple or a dict; it must \n",
+ " have a value for each of the variables in the joint. Also keep track \n",
+ " of the values we have seen so far for each variable.""" \n",
+ " values = event_values ( values , self . variables ) \n",
+ " self . prob [ values ] = p \n",
+ " for var , val in zip ( self . variables , values ): \n",
+ " if val not in self . vals [ var ]: \n",
+ " self . vals [ var ] . append ( val ) \n",
+ "\n",
+ " def values ( self , var ): \n",
+ " """Return the set of possible values for a variable.""" \n",
+ " return self . vals [ var ] \n",
+ "\n",
+ " def __repr__ ( self ): \n",
+ " return "P({})" . format ( self . variables ) \n",
+ " \n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
"source": [
- "%psource JointProbDist"
+ "psource(JointProbDist)"
]
},
{
@@ -213,11 +595,20 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "P(['X', 'Y'])"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"variables = ['X', 'Y']\n",
"j = JointProbDist(variables)\n",
@@ -234,11 +625,20 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(0.2, 0.5)"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"j[1,1] = 0.2\n",
"j[dict(X=0, Y=1)] = 0.5\n",
@@ -255,11 +655,20 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[1, 0]"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"j.values('X')"
]
@@ -274,7 +683,7 @@
"\n",
"This is illustrated in **Section 13.3** of the book. The functions **enumerate_joint** and **enumerate_joint_ask** implement this functionality. Under the hood they implement **Equation 13.9** from the book.\n",
"\n",
- "$$\\textbf{P}(X | \\textbf{e}) = α \\textbf{P}(X, \\textbf{e}) = α \\sum_{y} \\textbf{P}(X, \\textbf{e}, \\textbf{y})$$\n",
+ "$$\\textbf{P}(X | \\textbf{e}) = \\alpha \\textbf{P}(X, \\textbf{e}) = \\alpha \\sum_{y} \\textbf{P}(X, \\textbf{e}, \\textbf{y})$$\n",
"\n",
"Here **α** is the normalizing factor. **X** is our query variable and **e** is the evidence. According to the equation we enumerate on the remaining variables **y** (not in evidence or query variable) i.e. all possible combinations of **y**\n",
"\n",
@@ -283,10 +692,8 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
+ "execution_count": 15,
+ "metadata": {},
"outputs": [],
"source": [
"full_joint = JointProbDist(['Cavity', 'Toothache', 'Catch'])\n",
@@ -309,13 +716,119 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ " Codestin Search App \n",
+ " \n",
+ " \n",
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "def enumerate_joint ( variables , e , P ): \n",
+ " """Return the sum of those entries in P consistent with e, \n",
+ " provided variables is P's remaining variables (the ones not in e).""" \n",
+ " if not variables : \n",
+ " return P [ e ] \n",
+ " Y , rest = variables [ 0 ], variables [ 1 :] \n",
+ " return sum ([ enumerate_joint ( rest , extend ( e , Y , y ), P ) \n",
+ " for y in P . values ( Y )]) \n",
+ " \n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
"source": [
- "%psource enumerate_joint"
+ "psource(enumerate_joint)"
]
},
{
@@ -327,11 +840,20 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.19999999999999998"
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"evidence = dict(Toothache=True)\n",
"variables = ['Cavity', 'Catch'] # variables not part of evidence\n",
@@ -348,11 +870,20 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.12"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"evidence = dict(Cavity=True, Toothache=True)\n",
"variables = ['Catch'] # variables not part of evidence\n",
@@ -371,11 +902,20 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
+ "execution_count": 19,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.6"
+ ]
+ },
+ "execution_count": 19,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"ans2/ans1"
]
@@ -389,13 +929,125 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
+ "execution_count": 20,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ " Codestin Search App \n",
+ " \n",
+ " \n",
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "def enumerate_joint_ask ( X , e , P ): \n",
+ " """Return a probability distribution over the values of the variable X, \n",
+ " given the {var:val} observations e, in the JointProbDist P. [Section 13.3] \n",
+ " >>> P = JointProbDist(['X', 'Y']) \n",
+ " >>> P[0,0] = 0.25; P[0,1] = 0.5; P[1,1] = P[2,1] = 0.125 \n",
+ " >>> enumerate_joint_ask('X', dict(Y=1), P).show_approx() \n",
+ " '0: 0.667, 1: 0.167, 2: 0.167' \n",
+ " """ \n",
+ " assert X not in e , "Query variable must be distinct from evidence" \n",
+ " Q = ProbDist ( X ) # probability distribution for X, initially empty \n",
+ " Y = [ v for v in P . variables if v != X and v not in e ] # hidden variables. \n",
+ " for xi in P . values ( X ): \n",
+ " Q [ xi ] = enumerate_joint ( Y , extend ( e , X , xi ), P ) \n",
+ " return Q . normalize () \n",
+ " \n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
"source": [
- "%psource enumerate_joint_ask"
+ "psource(enumerate_joint_ask)"
]
},
{
@@ -407,11 +1059,20 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
+ "execution_count": 21,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(0.6, 0.39999999999999997)"
+ ]
+ },
+ "execution_count": 21,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"query_variable = 'Cavity'\n",
"evidence = dict(Toothache=True)\n",
@@ -430,7 +1091,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "## Bayesian Networks\n",
+ "## BAYESIAN NETWORKS\n",
"\n",
"A Bayesian network is a representation of the joint probability distribution encoding a collection of conditional independence statements.\n",
"\n",
@@ -441,13 +1102,182 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
+ "execution_count": 22,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ " Codestin Search App \n",
+ " \n",
+ " \n",
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "class BayesNode : \n",
+ " """A conditional probability distribution for a boolean variable, \n",
+ " P(X | parents). Part of a BayesNet.""" \n",
+ "\n",
+ " def __init__ ( self , X , parents , cpt ): \n",
+ " """X is a variable name, and parents a sequence of variable \n",
+ " names or a space-separated string. cpt, the conditional \n",
+ " probability table, takes one of these forms: \n",
+ "\n",
+ " * A number, the unconditional probability P(X=true). You can \n",
+ " use this form when there are no parents. \n",
+ "\n",
+ " * A dict {v: p, ...}, the conditional probability distribution \n",
+ " P(X=true | parent=v) = p. When there's just one parent. \n",
+ "\n",
+ " * A dict {(v1, v2, ...): p, ...}, the distribution P(X=true | \n",
+ " parent1=v1, parent2=v2, ...) = p. Each key must have as many \n",
+ " values as there are parents. You can use this form always; \n",
+ " the first two are just conveniences. \n",
+ "\n",
+ " In all cases the probability of X being false is left implicit, \n",
+ " since it follows from P(X=true). \n",
+ "\n",
+ " >>> X = BayesNode('X', '', 0.2) \n",
+ " >>> Y = BayesNode('Y', 'P', {T: 0.2, F: 0.7}) \n",
+ " >>> Z = BayesNode('Z', 'P Q', \n",
+ " ... {(T, T): 0.2, (T, F): 0.3, (F, T): 0.5, (F, F): 0.7}) \n",
+ " """ \n",
+ " if isinstance ( parents , str ): \n",
+ " parents = parents . split () \n",
+ "\n",
+ " # We store the table always in the third form above. \n",
+ " if isinstance ( cpt , ( float , int )): # no parents, 0-tuple \n",
+ " cpt = {(): cpt } \n",
+ " elif isinstance ( cpt , dict ): \n",
+ " # one parent, 1-tuple \n",
+ " if cpt and isinstance ( list ( cpt . keys ())[ 0 ], bool ): \n",
+ " cpt = {( v ,): p for v , p in cpt . items ()} \n",
+ "\n",
+ " assert isinstance ( cpt , dict ) \n",
+ " for vs , p in cpt . items (): \n",
+ " assert isinstance ( vs , tuple ) and len ( vs ) == len ( parents ) \n",
+ " assert all ( isinstance ( v , bool ) for v in vs ) \n",
+ " assert 0 <= p <= 1 \n",
+ "\n",
+ " self . variable = X \n",
+ " self . parents = parents \n",
+ " self . cpt = cpt \n",
+ " self . children = [] \n",
+ "\n",
+ " def p ( self , value , event ): \n",
+ " """Return the conditional probability \n",
+ " P(X=value | parents=parent_values), where parent_values \n",
+ " are the values of parents in event. (event must assign each \n",
+ " parent a value.) \n",
+ " >>> bn = BayesNode('X', 'Burglary', {T: 0.2, F: 0.625}) \n",
+ " >>> bn.p(False, {'Burglary': False, 'Earthquake': True}) \n",
+ " 0.375""" \n",
+ " assert isinstance ( value , bool ) \n",
+ " ptrue = self . cpt [ event_values ( event , self . parents )] \n",
+ " return ptrue if value else 1 - ptrue \n",
+ "\n",
+ " def sample ( self , event ): \n",
+ " """Sample from the distribution for this variable conditioned \n",
+ " on event's values for parent_variables. That is, return True/False \n",
+ " at random according with the conditional probability given the \n",
+ " parents.""" \n",
+ " return probability ( self . p ( True , event )) \n",
+ "\n",
+ " def __repr__ ( self ): \n",
+ " return repr (( self . variable , ' ' . join ( self . parents ))) \n",
+ " \n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
"source": [
- "%psource BayesNode"
+ "psource(BayesNode)"
]
},
{
@@ -465,10 +1295,8 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true
- },
+ "execution_count": 23,
+ "metadata": {},
"outputs": [],
"source": [
"alarm_node = BayesNode('Alarm', ['Burglary', 'Earthquake'], \n",
@@ -484,15 +1312,13 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true
- },
+ "execution_count": 24,
+ "metadata": {},
"outputs": [],
"source": [
"john_node = BayesNode('JohnCalls', ['Alarm'], {True: 0.90, False: 0.05})\n",
"mary_node = BayesNode('MaryCalls', 'Alarm', {(True, ): 0.70, (False, ): 0.01}) # Using string for parents.\n",
- "# Equvivalant to john_node definition. "
+ "# Equivalant to john_node definition."
]
},
{
@@ -504,10 +1330,8 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true
- },
+ "execution_count": 25,
+ "metadata": {},
"outputs": [],
"source": [
"burglary_node = BayesNode('Burglary', '', 0.001)\n",
@@ -523,11 +1347,20 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
+ "execution_count": 26,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.09999999999999998"
+ ]
+ },
+ "execution_count": 26,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"john_node.p(False, {'Alarm': True, 'Burglary': True}) # P(JohnCalls=False | Alarm=True)"
]
@@ -541,13 +1374,148 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
+ "execution_count": 27,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ " Codestin Search App \n",
+ " \n",
+ " \n",
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "class BayesNet : \n",
+ " """Bayesian network containing only boolean-variable nodes.""" \n",
+ "\n",
+ " def __init__ ( self , node_specs = None ): \n",
+ " """Nodes must be ordered with parents before children.""" \n",
+ " self . nodes = [] \n",
+ " self . variables = [] \n",
+ " node_specs = node_specs or [] \n",
+ " for node_spec in node_specs : \n",
+ " self . add ( node_spec ) \n",
+ "\n",
+ " def add ( self , node_spec ): \n",
+ " """Add a node to the net. Its parents must already be in the \n",
+ " net, and its variable must not.""" \n",
+ " node = BayesNode ( * node_spec ) \n",
+ " assert node . variable not in self . variables \n",
+ " assert all (( parent in self . variables ) for parent in node . parents ) \n",
+ " self . nodes . append ( node ) \n",
+ " self . variables . append ( node . variable ) \n",
+ " for parent in node . parents : \n",
+ " self . variable_node ( parent ) . children . append ( node ) \n",
+ "\n",
+ " def variable_node ( self , var ): \n",
+ " """Return the node for the variable named var. \n",
+ " >>> burglary.variable_node('Burglary').variable \n",
+ " 'Burglary'""" \n",
+ " for n in self . nodes : \n",
+ " if n . variable == var : \n",
+ " return n \n",
+ " raise Exception ( "No such variable: {}" . format ( var )) \n",
+ "\n",
+ " def variable_values ( self , var ): \n",
+ " """Return the domain of var.""" \n",
+ " return [ True , False ] \n",
+ "\n",
+ " def __repr__ ( self ): \n",
+ " return 'BayesNet({0!r})' . format ( self . nodes ) \n",
+ " \n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
"source": [
- "%psource BayesNet"
+ "psource(BayesNet)"
]
},
{
@@ -572,11 +1540,20 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
+ "execution_count": 28,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "BayesNet([('Burglary', ''), ('Earthquake', ''), ('Alarm', 'Burglary Earthquake'), ('JohnCalls', 'Alarm'), ('MaryCalls', 'Alarm')])"
+ ]
+ },
+ "execution_count": 28,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"burglary"
]
@@ -590,22 +1567,43 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
+ "execution_count": 29,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "probability.BayesNode"
+ ]
+ },
+ "execution_count": 29,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"type(burglary.variable_node('Alarm'))"
]
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
+ "execution_count": 30,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{(True, True): 0.95,\n",
+ " (True, False): 0.94,\n",
+ " (False, True): 0.29,\n",
+ " (False, False): 0.001}"
+ ]
+ },
+ "execution_count": 30,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"burglary.variable_node('Alarm').cpt"
]
@@ -627,20 +1625,132 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
+ "execution_count": 31,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ " Codestin Search App \n",
+ " \n",
+ " \n",
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "def enumerate_all ( variables , e , bn ): \n",
+ " """Return the sum of those entries in P(variables | e{others}) \n",
+ " consistent with e, where P is the joint distribution represented \n",
+ " by bn, and e{others} means e restricted to bn's other variables \n",
+ " (the ones other than variables). Parents must precede children in variables.""" \n",
+ " if not variables : \n",
+ " return 1.0 \n",
+ " Y , rest = variables [ 0 ], variables [ 1 :] \n",
+ " Ynode = bn . variable_node ( Y ) \n",
+ " if Y in e : \n",
+ " return Ynode . p ( e [ Y ], e ) * enumerate_all ( rest , e , bn ) \n",
+ " else : \n",
+ " return sum ( Ynode . p ( y , e ) * enumerate_all ( rest , extend ( e , Y , y ), bn ) \n",
+ " for y in bn . variable_values ( Y )) \n",
+ " \n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
"source": [
- "%psource enumerate_all"
+ "psource(enumerate_all)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "**enumerate__all** recursively evaluates a general form of the **Equation 14.4** in the book.\n",
+ "**enumerate_all** recursively evaluates a general form of the **Equation 14.4** in the book.\n",
"\n",
"$$\\textbf{P}(X | \\textbf{e}) = α \\textbf{P}(X, \\textbf{e}) = α \\sum_{y} \\textbf{P}(X, \\textbf{e}, \\textbf{y})$$ \n",
"\n",
@@ -651,29 +1761,147 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
+ "execution_count": 32,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ " Codestin Search App \n",
+ " \n",
+ " \n",
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "def enumeration_ask ( X , e , bn ): \n",
+ " """Return the conditional probability distribution of variable X \n",
+ " given evidence e, from BayesNet bn. [Figure 14.9] \n",
+ " >>> enumeration_ask('Burglary', dict(JohnCalls=T, MaryCalls=T), burglary \n",
+ " ... ).show_approx() \n",
+ " 'False: 0.716, True: 0.284'""" \n",
+ " assert X not in e , "Query variable must be distinct from evidence" \n",
+ " Q = ProbDist ( X ) \n",
+ " for xi in bn . variable_values ( X ): \n",
+ " Q [ xi ] = enumerate_all ( bn . variables , extend ( e , X , xi ), bn ) \n",
+ " return Q . normalize () \n",
+ " \n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
"source": [
- "%psource enumeration_ask"
+ "psource(enumeration_ask)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "Let us solve the problem of finding out **P(Burglary=True | JohnCalls=True, MaryCalls=True)** using the **burglary** network.**enumeration_ask** takes three arguments **X** = variable name, **e** = Evidence (in form a dict like previously explained), **bn** = The Bayes Net to do inference on."
+ "Let us solve the problem of finding out **P(Burglary=True | JohnCalls=True, MaryCalls=True)** using the **burglary** network. **enumeration_ask** takes three arguments **X** = variable name, **e** = Evidence (in form a dict like previously explained), **bn** = The Bayes Net to do inference on."
]
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
+ "execution_count": 33,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.2841718353643929"
+ ]
+ },
+ "execution_count": 33,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"ans_dist = enumeration_ask('Burglary', {'JohnCalls': True, 'MaryCalls': True}, burglary)\n",
"ans_dist[True]"
@@ -699,13 +1927,120 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
+ "execution_count": 34,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ " Codestin Search App \n",
+ " \n",
+ " \n",
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "def make_factor ( var , e , bn ): \n",
+ " """Return the factor for var in bn's joint distribution given e. \n",
+ " That is, bn's full joint distribution, projected to accord with e, \n",
+ " is the pointwise product of these factors for bn's variables.""" \n",
+ " node = bn . variable_node ( var ) \n",
+ " variables = [ X for X in [ var ] + node . parents if X not in e ] \n",
+ " cpt = { event_values ( e1 , variables ): node . p ( e1 [ var ], e1 ) \n",
+ " for e1 in all_events ( variables , bn , e )} \n",
+ " return Factor ( variables , cpt ) \n",
+ " \n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
"source": [
- "%psource make_factor"
+ "psource(make_factor)"
]
},
{
@@ -721,13 +2056,120 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
+ "execution_count": 35,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ " Codestin Search App \n",
+ " \n",
+ " \n",
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "def all_events ( variables , bn , e ): \n",
+ " """Yield every way of extending e with values for all variables.""" \n",
+ " if not variables : \n",
+ " yield e \n",
+ " else : \n",
+ " X , rest = variables [ 0 ], variables [ 1 :] \n",
+ " for e1 in all_events ( rest , bn , e ): \n",
+ " for x in bn . variable_values ( X ): \n",
+ " yield extend ( e1 , X , x ) \n",
+ " \n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
"source": [
- "%psource all_events"
+ "psource(all_events)"
]
},
{
@@ -741,10 +2183,8 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
+ "execution_count": 36,
+ "metadata": {},
"outputs": [],
"source": [
"f5 = make_factor('MaryCalls', {'JohnCalls': True, 'MaryCalls': True}, burglary)"
@@ -752,33 +2192,60 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
+ "execution_count": 37,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 37,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"f5"
]
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
+ "execution_count": 38,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{(True,): 0.7, (False,): 0.01}"
+ ]
+ },
+ "execution_count": 38,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"f5.cpt"
]
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
+ "execution_count": 39,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['Alarm']"
+ ]
+ },
+ "execution_count": 39,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"f5.variables"
]
@@ -792,10 +2259,8 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true
- },
+ "execution_count": 40,
+ "metadata": {},
"outputs": [],
"source": [
"new_factor = make_factor('MaryCalls', {'Alarm': True}, burglary)"
@@ -803,11 +2268,20 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
+ "execution_count": 41,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{(True,): 0.7, (False,): 0.30000000000000004}"
+ ]
+ },
+ "execution_count": 41,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"new_factor.cpt"
]
@@ -825,13 +2299,117 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
+ "execution_count": 42,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ " Codestin Search App \n",
+ " \n",
+ " \n",
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ " def pointwise_product ( self , other , bn ): \n",
+ " """Multiply two factors, combining their variables.""" \n",
+ " variables = list ( set ( self . variables ) | set ( other . variables )) \n",
+ " cpt = { event_values ( e , variables ): self . p ( e ) * other . p ( e ) \n",
+ " for e in all_events ( variables , bn , {})} \n",
+ " return Factor ( variables , cpt ) \n",
+ " \n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
"source": [
- "%psource Factor.pointwise_product"
+ "psource(Factor.pointwise_product)"
]
},
{
@@ -843,13 +2421,113 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
+ "execution_count": 43,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ " Codestin Search App \n",
+ " \n",
+ " \n",
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "def pointwise_product ( factors , bn ): \n",
+ " return reduce ( lambda f , g : f . pointwise_product ( g , bn ), factors ) \n",
+ " \n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
"source": [
- "%psource pointwise_product"
+ "psource(pointwise_product)"
]
},
{
@@ -861,13 +2539,118 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
+ "execution_count": 44,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ " Codestin Search App \n",
+ " \n",
+ " \n",
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ " def sum_out ( self , var , bn ): \n",
+ " """Make a factor eliminating var by summing over its values.""" \n",
+ " variables = [ X for X in self . variables if X != var ] \n",
+ " cpt = { event_values ( e , variables ): sum ( self . p ( extend ( e , var , val )) \n",
+ " for val in bn . variable_values ( var )) \n",
+ " for e in all_events ( variables , bn , {})} \n",
+ " return Factor ( variables , cpt ) \n",
+ " \n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
"source": [
- "%psource Factor.sum_out"
+ "psource(Factor.sum_out)"
]
},
{
@@ -879,13 +2662,118 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
+ "execution_count": 45,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ " Codestin Search App \n",
+ " \n",
+ " \n",
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "def sum_out ( var , factors , bn ): \n",
+ " """Eliminate var from all factors by summing over its values.""" \n",
+ " result , var_factors = [], [] \n",
+ " for f in factors : \n",
+ " ( var_factors if var in f . variables else result ) . append ( f ) \n",
+ " result . append ( pointwise_product ( var_factors , bn ) . sum_out ( var , bn )) \n",
+ " return result \n",
+ " \n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
"source": [
- "%psource sum_out"
+ "psource(sum_out)"
]
},
{
@@ -910,26 +2798,226 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
+ "execution_count": 46,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ " Codestin Search App \n",
+ " \n",
+ " \n",
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "def elimination_ask ( X , e , bn ): \n",
+ " """Compute bn's P(X|e) by variable elimination. [Figure 14.11] \n",
+ " >>> elimination_ask('Burglary', dict(JohnCalls=T, MaryCalls=T), burglary \n",
+ " ... ).show_approx() \n",
+ " 'False: 0.716, True: 0.284'""" \n",
+ " assert X not in e , "Query variable must be distinct from evidence" \n",
+ " factors = [] \n",
+ " for var in reversed ( bn . variables ): \n",
+ " factors . append ( make_factor ( var , e , bn )) \n",
+ " if is_hidden ( var , X , e ): \n",
+ " factors = sum_out ( var , factors , bn ) \n",
+ " return pointwise_product ( factors , bn ) . normalize () \n",
+ " \n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
"source": [
- "%psource elimination_ask"
+ "psource(elimination_ask)"
]
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
+ "execution_count": 47,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'False: 0.716, True: 0.284'"
+ ]
+ },
+ "execution_count": 47,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "elimination_ask('Burglary', dict(JohnCalls=True, MaryCalls=True), burglary).show_approx()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Elimination Ask Optimizations\n",
+ "\n",
+ "`elimination_ask` has some critical point to consider and some optimizations could be performed:\n",
+ "\n",
+ "- **Operation on factors**:\n",
+ "\n",
+ " `sum_out` and `pointwise_product` function used in `elimination_ask` is where space and time complexity arise in the variable elimination algorithm (AIMA3e pg. 526).\n",
+ "\n",
+ ">The only trick is to notice that any factor that does not depend on the variable to be summed out can be moved outside the summation.\n",
+ "\n",
+ "- **Variable ordering**:\n",
+ "\n",
+ " Elimination ordering is important, every choice of ordering yields a valid algorithm, but different orderings cause different intermediate factors to be generated during the calculation (AIMA3e pg. 527). In this case the algorithm applies a reversed order.\n",
+ "\n",
+ "> In general, the time and space requirements of variable elimination are dominated by the size of the largest factor constructed during the operation of the algorithm. This in turn is determined by the order of elimination of variables and by the structure of the network. It turns out to be intractable to determine the optimal ordering, but several good heuristics are available. One fairly effective method is a greedy one: eliminate whichever variable minimizes the size of the next factor to be constructed. \n",
+ "\n",
+ "- **Variable relevance**\n",
+ " \n",
+ " Some variables could be irrelevant to resolve a query (i.e. sums to 1). A variable elimination algorithm can therefore remove all these variables before evaluating the query (AIMA3e pg. 528).\n",
+ "\n",
+ "> An optimization is to remove 'every variable that is not an ancestor of a query variable or evidence variable is irrelevant to the query'."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Runtime comparison\n",
+ "Let's see how the runtimes of these two algorithms compare.\n",
+ "We expect variable elimination to outperform enumeration by a large margin as we reduce the number of repetitive calculations significantly."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 48,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "105 µs ± 11.9 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n"
+ ]
+ }
+ ],
"source": [
+ "%%timeit\n",
+ "enumeration_ask('Burglary', dict(JohnCalls=True, MaryCalls=True), burglary).show_approx()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 49,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "262 µs ± 54.7 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%timeit\n",
"elimination_ask('Burglary', dict(JohnCalls=True, MaryCalls=True), burglary).show_approx()"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "In this test case we observe that variable elimination is slower than what we expected. It has something to do with number of threads, how Python tries to optimize things and this happens because the network is very small, with just 5 nodes. The `elimination_ask` has some critical point and some optimizations must be perfomed as seen above.\n",
+ " \n",
+ "Of course, for more complicated networks, variable elimination will be significantly faster and runtime will drop not just by a constant factor, but by a polynomial factor proportional to the number of nodes, due to the reduction in repeated calculations."
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
@@ -941,13 +3029,117 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
+ "execution_count": 50,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ " Codestin Search App \n",
+ " \n",
+ " \n",
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ " def sample ( self , event ): \n",
+ " """Sample from the distribution for this variable conditioned \n",
+ " on event's values for parent_variables. That is, return True/False \n",
+ " at random according with the conditional probability given the \n",
+ " parents.""" \n",
+ " return probability ( self . p ( True , event )) \n",
+ " \n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
"source": [
- "%psource BayesNode.sample"
+ "psource(BayesNode.sample)"
]
},
{
@@ -963,13 +3155,118 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
+ "execution_count": 51,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ " Codestin Search App \n",
+ " \n",
+ " \n",
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "def prior_sample ( bn ): \n",
+ " """Randomly sample from bn's full joint distribution. The result \n",
+ " is a {variable: value} dict. [Figure 14.13]""" \n",
+ " event = {} \n",
+ " for node in bn . nodes : \n",
+ " event [ node . variable ] = node . sample ( event ) \n",
+ " return event \n",
+ " \n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
"source": [
- "%psource prior_sample"
+ "psource(prior_sample)"
]
},
{
@@ -980,15 +3277,25 @@
"\n",
" \n",
"\n",
- "We store the samples on the observations. Let us find **P(Rain=True)**"
+ "Traversing the graph in topological order is important.\n",
+ "There are two possible topological orderings for this particular directed acyclic graph.\n",
+ " \n",
+ "1. `Cloudy -> Sprinkler -> Rain -> Wet Grass`\n",
+ "2. `Cloudy -> Rain -> Sprinkler -> Wet Grass`\n",
+ " \n",
+ " \n",
+ "We can follow any of the two orderings to sample from the network.\n",
+ "Any ordering other than these two, however, cannot be used.\n",
+ " \n",
+ "One way to think about this is that `Cloudy` can be seen as a precondition of both `Rain` and `Sprinkler` and just like we have seen in planning, preconditions need to be satisfied before a certain action can be executed.\n",
+ " \n",
+ "We store the samples on the observations. Let us find **P(Rain=True)** by taking 1000 random samples from the network."
]
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
+ "execution_count": 52,
+ "metadata": {},
"outputs": [],
"source": [
"N = 1000\n",
@@ -1004,10 +3311,8 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true
- },
+ "execution_count": 53,
+ "metadata": {},
"outputs": [],
"source": [
"rain_true = [observation for observation in all_observations if observation['Rain'] == True]"
@@ -1022,11 +3327,17 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
+ "execution_count": 54,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "0.503\n"
+ ]
+ }
+ ],
"source": [
"answer = len(rain_true) / N\n",
"print(answer)"
@@ -1036,16 +3347,50 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "To evaluate a conditional distribution. We can use a two-step filtering process. We first separate out the variables that are consistent with the evidence. Then for each value of query variable, we can find probabilities. For example to find **P(Cloudy=True | Rain=True)**. We have already filtered out the values consistent with our evidence in **rain_true**. Now we apply a second filtering step on **rain_true** to find **P(Rain=True and Cloudy=True)**"
+ "Sampling this another time might give different results as we have no control over the distribution of the random samples"
]
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
+ "execution_count": 55,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "0.519\n"
+ ]
+ }
+ ],
+ "source": [
+ "N = 1000\n",
+ "all_observations = [prior_sample(sprinkler) for x in range(N)]\n",
+ "rain_true = [observation for observation in all_observations if observation['Rain'] == True]\n",
+ "answer = len(rain_true) / N\n",
+ "print(answer)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "To evaluate a conditional distribution. We can use a two-step filtering process. We first separate out the variables that are consistent with the evidence. Then for each value of query variable, we can find probabilities. For example to find **P(Cloudy=True | Rain=True)**. We have already filtered out the values consistent with our evidence in **rain_true**. Now we apply a second filtering step on **rain_true** to find **P(Rain=True and Cloudy=True)**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 56,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "0.8265895953757225\n"
+ ]
+ }
+ ],
"source": [
"rain_and_cloudy = [observation for observation in rain_true if observation['Cloudy'] == True]\n",
"answer = len(rain_and_cloudy) / len(rain_true)\n",
@@ -1058,18 +3403,156 @@
"source": [
"### Rejection Sampling\n",
"\n",
- "Rejection Sampling is based on an idea similar to what we did just now. First, it generates samples from the prior distribution specified by the network. Then, it rejects all those that do not match the evidence. The function **rejection_sampling** implements the algorithm described by **Figure 14.14**"
+ "Rejection Sampling is based on an idea similar to what we did just now. \n",
+ "First, it generates samples from the prior distribution specified by the network. \n",
+ "Then, it rejects all those that do not match the evidence. \n",
+ " \n",
+ "Rejection sampling is advantageous only when we know the query beforehand.\n",
+ "While prior sampling generally works for any query, it might fail in some scenarios.\n",
+ " \n",
+ "Let's say we have a generic Bayesian network and we have evidence `e`, and we want to know how many times a state `A` is true, given evidence `e` is true.\n",
+ "Normally, prior sampling can answer this question, but let's assume that the probability of evidence `e` being true in our actual probability distribution is very small.\n",
+ "In this situation, it might be possible that sampling never encounters a data-point where `e` is true.\n",
+ "If our sampled data has no instance of `e` being true, `P(e) = 0`, and therefore `P(A | e) / P(e) = 0/0`, which is undefined.\n",
+ "We cannot find the required value using this sample.\n",
+ " \n",
+ "We can definitely increase the number of sample points, but we can never guarantee that we will encounter the case where `e` is non-zero (assuming our actual probability distribution has atleast one case where `e` is true).\n",
+ "To guarantee this, we would have to consider every single data point, which means we lose the speed advantage that approximation provides us and we essentially have to calculate the exact inference model of the Bayesian network.\n",
+ " \n",
+ " \n",
+ "Rejection sampling will be useful in this situation, as we already know the query.\n",
+ " \n",
+ "While sampling from the network, we will reject any sample which is inconsistent with the evidence variables of the given query (in this example, the only evidence variable is `e`).\n",
+ "We will only consider samples that do not violate **any** of the evidence variables.\n",
+ "In this way, we will have enough data with the required evidence to infer queries involving a subset of that evidence.\n",
+ " \n",
+ " \n",
+ "The function **rejection_sampling** implements the algorithm described by **Figure 14.14**"
]
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
+ "execution_count": 57,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ " Codestin Search App \n",
+ " \n",
+ " \n",
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "def rejection_sampling ( X , e , bn , N = 10000 ): \n",
+ " """Estimate the probability distribution of variable X given \n",
+ " evidence e in BayesNet bn, using N samples. [Figure 14.14] \n",
+ " Raises a ZeroDivisionError if all the N samples are rejected, \n",
+ " i.e., inconsistent with e. \n",
+ " >>> random.seed(47) \n",
+ " >>> rejection_sampling('Burglary', dict(JohnCalls=T, MaryCalls=T), \n",
+ " ... burglary, 10000).show_approx() \n",
+ " 'False: 0.7, True: 0.3' \n",
+ " """ \n",
+ " counts = { x : 0 for x in bn . variable_values ( X )} # bold N in [Figure 14.14] \n",
+ " for j in range ( N ): \n",
+ " sample = prior_sample ( bn ) # boldface x in [Figure 14.14] \n",
+ " if consistent_with ( sample , e ): \n",
+ " counts [ sample [ X ]] += 1 \n",
+ " return ProbDist ( X , counts ) \n",
+ " \n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
"source": [
- "%psource rejection_sampling"
+ "psource(rejection_sampling)"
]
},
{
@@ -1083,13 +3566,115 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
+ "execution_count": 58,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ " Codestin Search App \n",
+ " \n",
+ " \n",
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "def consistent_with ( event , evidence ): \n",
+ " """Is event consistent with the given evidence?""" \n",
+ " return all ( evidence . get ( k , v ) == v \n",
+ " for k , v in event . items ()) \n",
+ " \n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
"source": [
- "%psource consistent_with"
+ "psource(consistent_with)"
]
},
{
@@ -1101,11 +3686,20 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
+ "execution_count": 59,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.8035019455252919"
+ ]
+ },
+ "execution_count": 59,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"p = rejection_sampling('Cloudy', dict(Rain=True), sprinkler, 1000)\n",
"p[True]"
@@ -1117,6 +3711,7 @@
"source": [
"### Likelihood Weighting\n",
"\n",
+ "Rejection sampling takes a long time to run when the probability of finding consistent evidence is low. It is also slow for larger networks and more evidence variables.\n",
"Rejection sampling tends to reject a lot of samples if our evidence consists of a large number of variables. Likelihood Weighting solves this by fixing the evidence (i.e. not sampling it) and then using weights to make sure that our overall sampling is still consistent.\n",
"\n",
"The pseudocode in **Figure 14.15** is implemented as **likelihood_weighting** and **weighted_sample**."
@@ -1124,13 +3719,124 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
+ "execution_count": 60,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ " Codestin Search App \n",
+ " \n",
+ " \n",
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "def weighted_sample ( bn , e ): \n",
+ " """Sample an event from bn that's consistent with the evidence e; \n",
+ " return the event and its weight, the likelihood that the event \n",
+ " accords to the evidence.""" \n",
+ " w = 1 \n",
+ " event = dict ( e ) # boldface x in [Figure 14.15] \n",
+ " for node in bn . nodes : \n",
+ " Xi = node . variable \n",
+ " if Xi in e : \n",
+ " w *= node . p ( e [ Xi ], event ) \n",
+ " else : \n",
+ " event [ Xi ] = node . sample ( event ) \n",
+ " return event , w \n",
+ " \n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
"source": [
- "%psource weighted_sample"
+ "psource(weighted_sample)"
]
},
{
@@ -1145,24 +3851,144 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
+ "execution_count": 61,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "({'Rain': True, 'Cloudy': False, 'Sprinkler': True, 'WetGrass': True}, 0.2)"
+ ]
+ },
+ "execution_count": 61,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"weighted_sample(sprinkler, dict(Rain=True))"
]
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
+ "execution_count": 62,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ " Codestin Search App \n",
+ " \n",
+ " \n",
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "def likelihood_weighting ( X , e , bn , N = 10000 ): \n",
+ " """Estimate the probability distribution of variable X given \n",
+ " evidence e in BayesNet bn. [Figure 14.15] \n",
+ " >>> random.seed(1017) \n",
+ " >>> likelihood_weighting('Burglary', dict(JohnCalls=T, MaryCalls=T), \n",
+ " ... burglary, 10000).show_approx() \n",
+ " 'False: 0.702, True: 0.298' \n",
+ " """ \n",
+ " W = { x : 0 for x in bn . variable_values ( X )} \n",
+ " for j in range ( N ): \n",
+ " sample , weight = weighted_sample ( bn , e ) # boldface x, w in [Figure 14.15] \n",
+ " W [ sample [ X ]] += weight \n",
+ " return ProbDist ( X , W ) \n",
+ " \n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
"source": [
- "%psource likelihood_weighting"
+ "psource(likelihood_weighting)"
]
},
{
@@ -1174,11 +4000,20 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
+ "execution_count": 63,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'False: 0.2, True: 0.8'"
+ ]
+ },
+ "execution_count": 63,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"likelihood_weighting('Cloudy', dict(Rain=True), sprinkler, 200).show_approx()"
]
@@ -1196,13 +4031,124 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
+ "execution_count": 64,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ " Codestin Search App \n",
+ " \n",
+ " \n",
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "def gibbs_ask ( X , e , bn , N = 1000 ): \n",
+ " """[Figure 14.16]""" \n",
+ " assert X not in e , "Query variable must be distinct from evidence" \n",
+ " counts = { x : 0 for x in bn . variable_values ( X )} # bold N in [Figure 14.16] \n",
+ " Z = [ var for var in bn . variables if var not in e ] \n",
+ " state = dict ( e ) # boldface x in [Figure 14.16] \n",
+ " for Zi in Z : \n",
+ " state [ Zi ] = random . choice ( bn . variable_values ( Zi )) \n",
+ " for j in range ( N ): \n",
+ " for Zi in Z : \n",
+ " state [ Zi ] = markov_blanket_sample ( Zi , state , bn ) \n",
+ " counts [ state [ X ]] += 1 \n",
+ " return ProbDist ( X , counts ) \n",
+ " \n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
"source": [
- "%psource gibbs_ask"
+ "psource(gibbs_ask)"
]
},
{
@@ -1214,39 +4160,2377 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
+ "execution_count": 65,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'False: 0.215, True: 0.785'"
+ ]
+ },
+ "execution_count": 65,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"gibbs_ask('Cloudy', dict(Rain=True), sprinkler, 200).show_approx()"
]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
},
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.4.3"
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Runtime analysis\n",
+ "Let's take a look at how much time each algorithm takes."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 66,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "13.2 ms ± 3.45 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%timeit\n",
+ "all_observations = [prior_sample(sprinkler) for x in range(1000)]\n",
+ "rain_true = [observation for observation in all_observations if observation['Rain'] == True]\n",
+ "len([observation for observation in rain_true if observation['Cloudy'] == True]) / len(rain_true)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 67,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "11 ms ± 687 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%timeit\n",
+ "rejection_sampling('Cloudy', dict(Rain=True), sprinkler, 1000)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 68,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "2.12 ms ± 554 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%timeit\n",
+ "likelihood_weighting('Cloudy', dict(Rain=True), sprinkler, 200)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 69,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "14.4 ms ± 2.16 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%timeit\n",
+ "gibbs_ask('Cloudy', dict(Rain=True), sprinkler, 200)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "As expected, all algorithms have a very similar runtime.\n",
+ "However, rejection sampling would be a lot faster and more accurate when the probabiliy of finding data-points consistent with the required evidence is small.\n",
+ " \n",
+ "Likelihood weighting is the fastest out of all as it doesn't involve rejecting samples, but also has a quite high variance."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## HIDDEN MARKOV MODELS"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Often, we need to carry out probabilistic inference on temporal data or a sequence of observations where the order of observations matter.\n",
+ "We require a model similar to a Bayesian Network, but one that grows over time to keep up with the latest evidences.\n",
+ "If you are familiar with the `mdp` module or Markov models in general, you can probably guess that a Markov model might come close to representing our problem accurately.\n",
+ " \n",
+ "A Markov model is basically a chain-structured Bayesian Network in which there is one state for each time step and each node has an identical probability distribution.\n",
+ "The first node, however, has a different distribution, called the prior distribution which models the initial state of the process.\n",
+ "A state in a Markov model depends only on the previous state and the latest evidence and not on the states before it.\n",
+ " \n",
+ "A **Hidden Markov Model** or **HMM** is a special case of a Markov model in which the state of the process is described by a single discrete random variable.\n",
+ "The possible values of the variable are the possible states of the world.\n",
+ " \n",
+ "But what if we want to model a process with two or more state variables?\n",
+ "In that case, we can still fit the process into the HMM framework by redefining our state variables as a single \"megavariable\".\n",
+ "We do this because carrying out inference on HMMs have standard optimized algorithms.\n",
+ "A HMM is very similar to an MDP, but we don't have the option of taking actions like in MDPs, instead, the process carries on as new evidence appears.\n",
+ " \n",
+ "If a HMM is truncated at a fixed length, it becomes a Bayesian network and general BN inference can be used on it to answer queries.\n",
+ "\n",
+ "Before we start, it will be helpful to understand the structure of a temporal model. We will use the example of the book with the guard and the umbrella. In this example, the state $\\textbf{X}$ is whether it is a rainy day (`X = True`) or not (`X = False`) at Day $\\textbf{t}$. In the sensor or observation model, the observation or evidence $\\textbf{U}$ is whether the professor holds an umbrella (`U = True`) or not (`U = False`) on **Day** $\\textbf{t}$. Based on that, the transition model is \n",
+ "\n",
+ "| $X_{t-1}$ | $X_{t}$ | **P**$(X_{t}| X_{t-1})$| \n",
+ "| ------------- |------------- | ----------------------------------|\n",
+ "| ***${False}$*** | ***${False}$*** | 0.7 |\n",
+ "| ***${False}$*** | ***${True}$*** | 0.3 |\n",
+ "| ***${True}$*** | ***${False}$*** | 0.3 |\n",
+ "| ***${True}$*** | ***${True}$*** | 0.7 |\n",
+ "\n",
+ "And the the sensor model will be,\n",
+ "\n",
+ "| $X_{t}$ | $U_{t}$ | **P**$(U_{t}|X_{t})$| \n",
+ "| :-------------: |:-------------: | :------------------------:|\n",
+ "| ***${False}$*** | ***${True}$*** | 0.2 |\n",
+ "| ***${False}$*** | ***${False}$*** | 0.8 |\n",
+ "| ***${True}$*** | ***${True}$*** | 0.9 |\n",
+ "| ***${True}$*** | ***${False}$*** | 0.1 |\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "HMMs are implemented in the **`HiddenMarkovModel`** class.\n",
+ "Let's have a look."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 70,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ " Codestin Search App \n",
+ " \n",
+ " \n",
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "class HiddenMarkovModel : \n",
+ " """A Hidden markov model which takes Transition model and Sensor model as inputs""" \n",
+ "\n",
+ " def __init__ ( self , transition_model , sensor_model , prior = None ): \n",
+ " self . transition_model = transition_model \n",
+ " self . sensor_model = sensor_model \n",
+ " self . prior = prior or [ 0.5 , 0.5 ] \n",
+ "\n",
+ " def sensor_dist ( self , ev ): \n",
+ " if ev is True : \n",
+ " return self . sensor_model [ 0 ] \n",
+ " else : \n",
+ " return self . sensor_model [ 1 ] \n",
+ " \n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "psource(HiddenMarkovModel)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We instantiate the object **`hmm`** of the class using a list of lists for both the transition and the sensor model."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 71,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "umbrella_transition_model = [[0.7, 0.3], [0.3, 0.7]]\n",
+ "umbrella_sensor_model = [[0.9, 0.2], [0.1, 0.8]]\n",
+ "hmm = HiddenMarkovModel(umbrella_transition_model, umbrella_sensor_model)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The **`sensor_dist()`** method returns a list with the conditional probabilities of the sensor model."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 72,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[0.9, 0.2]"
+ ]
+ },
+ "execution_count": 72,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "hmm.sensor_dist(ev=True)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Now that we have defined an HMM object, our task here is to compute the belief $B_{t}(x)= P(X_{t}|U_{1:t})$ given evidence **U** at each time step **t**.\n",
+ " \n",
+ "The basic inference tasks that must be solved are:\n",
+ "1. **Filtering**: Computing the posterior probability distribution over the most recent state, given all the evidence up to the current time step.\n",
+ "2. **Prediction**: Computing the posterior probability distribution over the future state.\n",
+ "3. **Smoothing**: Computing the posterior probability distribution over a past state. Smoothing provides a better estimation as it incorporates more evidence.\n",
+ "4. **Most likely explanation**: Finding the most likely sequence of states for a given observation\n",
+ "5. **Learning**: The transition and sensor models can be learnt, if not yet known, just like in an information gathering agent\n",
+ " \n",
+ " \n",
+ "\n",
+ "There are three primary methods to carry out inference in Hidden Markov Models:\n",
+ "1. The Forward-Backward algorithm\n",
+ "2. Fixed lag smoothing\n",
+ "3. Particle filtering\n",
+ "\n",
+ "Let's have a look at how we can carry out inference and answer queries based on our umbrella HMM using these algorithms."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### FORWARD-BACKWARD\n",
+ "This is a general algorithm that works for all Markov models, not just HMMs.\n",
+ "In the filtering task (inference) we are given evidence **U** in each time **t** and we want to compute the belief $B_{t}(x)= P(X_{t}|U_{1:t})$. \n",
+ "We can think of it as a three step process:\n",
+ "1. In every step we start with the current belief $P(X_{t}|e_{1:t})$\n",
+ "2. We update it for time\n",
+ "3. We update it for evidence\n",
+ "\n",
+ "The forward algorithm performs the step 2 and 3 at once. It updates, or better say reweights, the initial belief using the transition and the sensor model. Let's see the umbrella example. On **Day 0** no observation is available, and for that reason we will assume that we have equal possibilities to rain or not. In the **`HiddenMarkovModel`** class, the prior probabilities for **Day 0** are by default [0.5, 0.5]. "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The observation update is calculated with the **`forward()`** function. Basically, we update our belief using the observation model. The function returns a list with the probabilities of **raining or not** on **Day 1**."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 73,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ " Codestin Search App \n",
+ " \n",
+ " \n",
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "def forward ( HMM , fv , ev ): \n",
+ " prediction = vector_add ( scalar_vector_product ( fv [ 0 ], HMM . transition_model [ 0 ]), \n",
+ " scalar_vector_product ( fv [ 1 ], HMM . transition_model [ 1 ])) \n",
+ " sensor_dist = HMM . sensor_dist ( ev ) \n",
+ "\n",
+ " return normalize ( element_wise_product ( sensor_dist , prediction )) \n",
+ " \n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "psource(forward)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 74,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "The probability of raining on day 1 is 0.82\n"
+ ]
+ }
+ ],
+ "source": [
+ "umbrella_prior = [0.5, 0.5]\n",
+ "belief_day_1 = forward(hmm, umbrella_prior, ev=True)\n",
+ "print ('The probability of raining on day 1 is {:.2f}'.format(belief_day_1[0]))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "In **Day 2** our initial belief is the updated belief of **Day 1**.\n",
+ "Again using the **`forward()`** function we can compute the probability of raining in **Day 2**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 75,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "The probability of raining in day 2 is 0.88\n"
+ ]
+ }
+ ],
+ "source": [
+ "belief_day_2 = forward(hmm, belief_day_1, ev=True)\n",
+ "print ('The probability of raining in day 2 is {:.2f}'.format(belief_day_2[0]))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "In the smoothing part we are interested in computing the distribution over past states given evidence up to the present. Assume that we want to compute the distribution for the time **k**, for $0\\leq k\n",
+ "\n",
+ "\n",
+ " Codestin Search App \n",
+ " \n",
+ " \n",
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "def backward ( HMM , b , ev ): \n",
+ " sensor_dist = HMM . sensor_dist ( ev ) \n",
+ " prediction = element_wise_product ( sensor_dist , b ) \n",
+ "\n",
+ " return normalize ( vector_add ( scalar_vector_product ( prediction [ 0 ], HMM . transition_model [ 0 ]), \n",
+ " scalar_vector_product ( prediction [ 1 ], HMM . transition_model [ 1 ]))) \n",
+ " \n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "psource(backward)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 77,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[0.6272727272727272, 0.37272727272727274]"
+ ]
+ },
+ "execution_count": 77,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "b = [1, 1]\n",
+ "backward(hmm, b, ev=True)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Some may notice that the result is not the same as in the book. The main reason is that in the book the normalization step is not used. If we want to normalize the result, one can use the **`normalize()`** helper function.\n",
+ "\n",
+ "In order to find the smoothed estimate for raining in **Day k**, we will use the **`forward_backward()`** function. As in the example in the book, the umbrella is observed in both days and the prior distribution is [0.5, 0.5]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 78,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/markdown": [
+ "### AIMA3e\n",
+ "__function__ FORWARD-BACKWARD(__ev__, _prior_) __returns__ a vector of probability distributions \n",
+ " __inputs__: __ev__, a vector of evidence values for steps 1,…,_t_ \n",
+ " _prior_, the prior distribution on the initial state, __P__(__X__0 ) \n",
+ " __local variables__: __fv__, a vector of forward messages for steps 0,…,_t_ \n",
+ " __b__, a representation of the backward message, initially all 1s \n",
+ " __sv__, a vector of smoothed estimates for steps 1,…,_t_ \n",
+ "\n",
+ " __fv__\\[0\\] ← _prior_ \n",
+ " __for__ _i_ = 1 __to__ _t_ __do__ \n",
+ " __fv__\\[_i_\\] ← FORWARD(__fv__\\[_i_ − 1\\], __ev__\\[_i_\\]) \n",
+ " __for__ _i_ = _t_ __downto__ 1 __do__ \n",
+ " __sv__\\[_i_\\] ← NORMALIZE(__fv__\\[_i_\\] × __b__) \n",
+ " __b__ ← BACKWARD(__b__, __ev__\\[_i_\\]) \n",
+ " __return__ __sv__\n",
+ "\n",
+ "---\n",
+ "__Figure ??__ The forward\\-backward algorithm for smoothing: computing posterior probabilities of a sequence of states given a sequence of observations. The FORWARD and BACKWARD operators are defined by Equations (__??__) and (__??__), respectively."
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 78,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pseudocode('Forward-Backward')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 79,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "The probability of raining in Day 0 is 0.65 and in Day 1 is 0.88\n"
+ ]
+ }
+ ],
+ "source": [
+ "umbrella_prior = [0.5, 0.5]\n",
+ "prob = forward_backward(hmm, ev=[T, T], prior=umbrella_prior)\n",
+ "print ('The probability of raining in Day 0 is {:.2f} and in Day 1 is {:.2f}'.format(prob[0][0], prob[1][0]))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "\n",
+ "Since HMMs are represented as single variable systems, we can represent the transition model and sensor model as matrices.\n",
+ "The `forward_backward` algorithm can be easily carried out on this representation (as we have done here) with a time complexity of $O({S}^{2} t)$ where t is the length of the sequence and each step multiplies a vector of size $S$ with a matrix of dimensions $SxS$.\n",
+ " \n",
+ "Additionally, the forward pass stores $t$ vectors of size $S$ which makes the auxiliary space requirement equivalent to $O(St)$.\n",
+ " \n",
+ " \n",
+ "Is there any way we can improve the time or space complexity?\n",
+ " \n",
+ "Fortunately, the matrix representation of HMM properties allows us to do so.\n",
+ " \n",
+ "If $f$ and $b$ represent the forward and backward messages respectively, we can modify the smoothing algorithm by first\n",
+ "running the standard forward pass to compute $f_{t:t}$ (forgetting all the intermediate results) and then running\n",
+ "backward pass for both $b$ and $f$ together, using them to compute the smoothed estimate at each step.\n",
+ "This optimization reduces auxlilary space requirement to constant (irrespective of the length of the sequence) provided\n",
+ "the transition matrix is invertible and the sensor model has no zeros (which is sometimes hard to accomplish)\n",
+ " \n",
+ " \n",
+ "Let's look at another algorithm, that carries out smoothing in a more optimized way."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### FIXED LAG SMOOTHING\n",
+ "The matrix formulation allows to optimize online smoothing with a fixed lag.\n",
+ " \n",
+ "Since smoothing can be done in constant, there should exist an algorithm whose time complexity is independent of the length of the lag.\n",
+ "For smoothing a time slice $t - d$ where $d$ is the lag, we need to compute $\\alpha f_{1:t-d}$ x $b_{t-d+1:t}$ incrementally.\n",
+ " \n",
+ "As we already know, the forward equation is\n",
+ " \n",
+ "$$f_{1:t+1} = \\alpha O_{t+1}{T}^{T}f_{1:t}$$\n",
+ " \n",
+ "and the backward equation is\n",
+ " \n",
+ "$$b_{k+1:t} = TO_{k+1}b_{k+2:t}$$\n",
+ " \n",
+ "where $T$ and $O$ are the transition and sensor models respectively.\n",
+ " \n",
+ "For smoothing, the forward message is easy to compute but there exists no simple relation between the backward message of this time step and the one at the previous time step, hence we apply the backward equation $d$ times to get\n",
+ " \n",
+ "$$b_{t-d+1:t} = \\left ( \\prod_{i=t-d+1}^{t}{TO_i} \\right )b_{t+1:t} = B_{t-d+1:t}1$$\n",
+ " \n",
+ "where $B_{t-d+1:t}$ is the product of the sequence of $T$ and $O$ matrices.\n",
+ " \n",
+ "Here's how the `probability` module implements `fixed_lag_smoothing`.\n",
+ " "
+ ]
},
- "widgets": {
- "state": {},
- "version": "1.1.1"
+ {
+ "cell_type": "code",
+ "execution_count": 80,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ " Codestin Search App \n",
+ " \n",
+ " \n",
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "def fixed_lag_smoothing ( e_t , HMM , d , ev , t ): \n",
+ " """[Figure 15.6] \n",
+ " Smoothing algorithm with a fixed time lag of 'd' steps. \n",
+ " Online algorithm that outputs the new smoothed estimate if observation \n",
+ " for new time step is given.""" \n",
+ " ev . insert ( 0 , None ) \n",
+ "\n",
+ " T_model = HMM . transition_model \n",
+ " f = HMM . prior \n",
+ " B = [[ 1 , 0 ], [ 0 , 1 ]] \n",
+ " evidence = [] \n",
+ "\n",
+ " evidence . append ( e_t ) \n",
+ " O_t = vector_to_diagonal ( HMM . sensor_dist ( e_t )) \n",
+ " if t > d : \n",
+ " f = forward ( HMM , f , e_t ) \n",
+ " O_tmd = vector_to_diagonal ( HMM . sensor_dist ( ev [ t - d ])) \n",
+ " B = matrix_multiplication ( inverse_matrix ( O_tmd ), inverse_matrix ( T_model ), B , T_model , O_t ) \n",
+ " else : \n",
+ " B = matrix_multiplication ( B , T_model , O_t ) \n",
+ " t += 1 \n",
+ "\n",
+ " if t > d : \n",
+ " # always returns a 1x2 matrix \n",
+ " return [ normalize ( i ) for i in matrix_multiplication ([ f ], B )][ 0 ] \n",
+ " else : \n",
+ " return None \n",
+ " \n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "psource(fixed_lag_smoothing)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This algorithm applies `forward` as usual and optimizes the smoothing step by using the equations above.\n",
+ "This optimization could be achieved only because HMM properties can be represented as matrices.\n",
+ " \n",
+ "`vector_to_diagonal`, `matrix_multiplication` and `inverse_matrix` are matrix manipulation functions to simplify the implementation.\n",
+ " \n",
+ "`normalize` is used to normalize the output before returning it."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Here's how we can use `fixed_lag_smoothing` for inference on our umbrella HMM."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 81,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "umbrella_transition_model = [[0.7, 0.3], [0.3, 0.7]]\n",
+ "umbrella_sensor_model = [[0.9, 0.2], [0.1, 0.8]]\n",
+ "hmm = HiddenMarkovModel(umbrella_transition_model, umbrella_sensor_model)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Given evidence T, F, T, F and T, we want to calculate the probability distribution for the fourth day with a fixed lag of 2 days.\n",
+ " \n",
+ "Let `e_t = False`"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 82,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[0.1111111111111111, 0.8888888888888888]"
+ ]
+ },
+ "execution_count": 82,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "e_t = F\n",
+ "evidence = [T, F, T, F, T]\n",
+ "fixed_lag_smoothing(e_t, hmm, d=2, ev=evidence, t=4)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 83,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[0.9938650306748466, 0.006134969325153394]"
+ ]
+ },
+ "execution_count": 83,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "e_t = T\n",
+ "evidence = [T, T, F, T, T]\n",
+ "fixed_lag_smoothing(e_t, hmm, d=1, ev=evidence, t=4)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We cannot calculate probability distributions when $t$ is less than $d$"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 84,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fixed_lag_smoothing(e_t, hmm, d=5, ev=evidence, t=4)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "As expected, the output is `None`"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### PARTICLE FILTERING\n",
+ "The filtering problem is too expensive to solve using the previous methods for problems with large or continuous state spaces.\n",
+ "Particle filtering is a method that can solve the same problem but when the state space is a lot larger, where we wouldn't be able to do these computations in a reasonable amount of time as fast, as time goes by, and we want to keep track of things as they happen.\n",
+ " \n",
+ "The downside is that it is a sampling method and hence isn't accurate, but the more samples we're willing to take, the more accurate we'd get.\n",
+ " \n",
+ "In this method, instead of keping track of the probability distribution, we will drop particles in a similar proportion at the required regions.\n",
+ "The internal representation of this distribution is usually a list of particles with coordinates in the state-space.\n",
+ "A particle is just a new name for a sample.\n",
+ "\n",
+ "Particle filtering can be divided into four steps:\n",
+ "1. __Initialization__: \n",
+ "If we have some idea about the prior probability distribution, we drop the initial particles accordingly, or else we just drop them uniformly over the state space.\n",
+ "\n",
+ "2. __Forward pass__: \n",
+ "As time goes by and measurements come in, we are going to move the selected particles into the grid squares that makes the most sense in terms of representing the distribution that we are trying to track.\n",
+ "When time goes by, we just loop through all our particles and try to simulate what could happen to each one of them by sampling its next position from the transition model.\n",
+ "This is like prior sampling - samples' frequencies reflect the transition probabilities.\n",
+ "If we have enough samples we are pretty close to exact values.\n",
+ "We work through the list of particles, one particle at a time, all we do is stochastically simulate what the outcome might be.\n",
+ "If we had no dimension of time, and we had no new measurements come in, this would be exactly the same as what we did in prior sampling.\n",
+ "\n",
+ "3. __Reweight__:\n",
+ "As observations come in, don't sample the observations, fix them and downweight the samples based on the evidence just like in likelihood weighting.\n",
+ "$$w(x) = P(e/x)$$\n",
+ "$$B(X) \\propto P(e/X)B'(X)$$\n",
+ " \n",
+ "As before, the probabilities don't sum to one, since most have been downweighted.\n",
+ "They sum to an approximation of $P(e)$.\n",
+ "To normalize the resulting distribution, we can divide by $P(e)$\n",
+ " \n",
+ "Likelihood weighting wasn't the best thing for Bayesian networks, because we were not accounting for the incoming evidence so we were getting samples from the prior distribution, in some sense not the right distribution, so we might end up with a lot of particles with low weights. \n",
+ "These samples were very uninformative and the way we fixed it then was by using __Gibbs sampling__.\n",
+ "Theoretically, Gibbs sampling can be run on a HMM, but as we iterated over the process infinitely many times in a Bayesian network, we cannot do that here as we have new incoming evidence and we also need computational cycles to propagate through time.\n",
+ " \n",
+ "A lot of samples with very low weight and they are not representative of the _actual probability distribution_.\n",
+ "So if we keep running likelihood weighting, we keep propagating the samples with smaller weights and carry out computations for that even though these samples have no significant contribution to the actual probability distribution.\n",
+ "Which is why we require this last step.\n",
+ "\n",
+ "4. __Resample__:\n",
+ "Rather than tracking weighted samples, we _resample_.\n",
+ "We choose from our weighted sample distribution as many times as the number of particles we initially had and we replace these particles too, so that we have a constant number of particles.\n",
+ "This is equivalent to renormalizing the distribution.\n",
+ "The samples with low weight are rarely chosen in the new distribution after resampling.\n",
+ "This newer set of particles after resampling is in some sense more representative of the actual distribution and so we are better allocating our computational cycles.\n",
+ "Now the update is complete for this time step, continue with the next one.\n",
+ "\n",
+ " \n",
+ "Let's see how this is implemented in the module."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 85,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ " Codestin Search App \n",
+ " \n",
+ " \n",
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "def particle_filtering ( e , N , HMM ): \n",
+ " """Particle filtering considering two states variables.""" \n",
+ " dist = [ 0.5 , 0.5 ] \n",
+ " # Weight Initialization \n",
+ " w = [ 0 for _ in range ( N )] \n",
+ " # STEP 1 \n",
+ " # Propagate one step using transition model given prior state \n",
+ " dist = vector_add ( scalar_vector_product ( dist [ 0 ], HMM . transition_model [ 0 ]), \n",
+ " scalar_vector_product ( dist [ 1 ], HMM . transition_model [ 1 ])) \n",
+ " # Assign state according to probability \n",
+ " s = [ 'A' if probability ( dist [ 0 ]) else 'B' for _ in range ( N )] \n",
+ " w_tot = 0 \n",
+ " # Calculate importance weight given evidence e \n",
+ " for i in range ( N ): \n",
+ " if s [ i ] == 'A' : \n",
+ " # P(U|A)*P(A) \n",
+ " w_i = HMM . sensor_dist ( e )[ 0 ] * dist [ 0 ] \n",
+ " if s [ i ] == 'B' : \n",
+ " # P(U|B)*P(B) \n",
+ " w_i = HMM . sensor_dist ( e )[ 1 ] * dist [ 1 ] \n",
+ " w [ i ] = w_i \n",
+ " w_tot += w_i \n",
+ "\n",
+ " # Normalize all the weights \n",
+ " for i in range ( N ): \n",
+ " w [ i ] = w [ i ] / w_tot \n",
+ "\n",
+ " # Limit weights to 4 digits \n",
+ " for i in range ( N ): \n",
+ " w [ i ] = float ( "{0:.4f}" . format ( w [ i ])) \n",
+ "\n",
+ " # STEP 2 \n",
+ "\n",
+ " s = weighted_sample_with_replacement ( N , s , w ) \n",
+ "\n",
+ " return s \n",
+ " \n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "psource(particle_filtering)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Here, `scalar_vector_product` and `vector_add` are helper functions to help with vector math and `weighted_sample_with_replacement` resamples from a weighted sample and replaces the original sample, as is obvious from the name.\n",
+ " \n",
+ "This implementation considers two state variables with generic names 'A' and 'B'.\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Here's how we can use `particle_filtering` on our umbrella HMM, though it doesn't make much sense using particle filtering on a problem with such a small state space.\n",
+ "It is just to get familiar with the syntax."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 86,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "umbrella_transition_model = [[0.7, 0.3], [0.3, 0.7]]\n",
+ "umbrella_sensor_model = [[0.9, 0.2], [0.1, 0.8]]\n",
+ "hmm = HiddenMarkovModel(umbrella_transition_model, umbrella_sensor_model)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 87,
+ "metadata": {
+ "scrolled": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A']"
+ ]
+ },
+ "execution_count": 87,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "particle_filtering(T, 10, hmm)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We got 5 samples from state `A` and 5 samples from state `B`"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 88,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['A', 'B', 'A', 'B', 'B', 'B', 'B', 'B', 'B', 'B']"
+ ]
+ },
+ "execution_count": 88,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "particle_filtering([F, T, F, F, T], 10, hmm)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This time we got 2 samples from state `A` and 8 samples from state `B`"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Comparing runtimes for these algorithms will not be useful, as each solves the filtering task efficiently for a different scenario.\n",
+ " \n",
+ "`forward_backward` calculates the exact probability distribution.\n",
+ " \n",
+ "`fixed_lag_smoothing` calculates an approximate distribution and its runtime will depend on the value of the lag chosen.\n",
+ " \n",
+ "`particle_filtering` is an efficient method for approximating distributions for a very large or continuous state space."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## MONTE CARLO LOCALIZATION\n",
+ "In the domain of robotics, particle filtering is used for _robot localization_.\n",
+ "__Localization__ is the problem of finding out where things are, in this case, we want to find the position of a robot in a continuous state space.\n",
+ " \n",
+ "__Monte Carlo Localization__ is an algorithm for robots to _localize_ using a _particle filter_.\n",
+ "Given a map of the environment, the algorithm estimates the position and orientation of a robot as it moves and senses the environment.\n",
+ " \n",
+ "Initially, particles are distributed uniformly over the state space, ie the robot has no information of where it is and assumes it is equally likely to be at any point in space.\n",
+ " \n",
+ "When the robot moves, it analyses the incoming evidence to shift and change the probability to better approximate the probability distribution of its position.\n",
+ "The particles are then resampled based on their weights.\n",
+ " \n",
+ "Gradually, as more evidence comes in, the robot gets better at approximating its location and the particles converge towards the actual position of the robot.\n",
+ " \n",
+ "The pose of a robot is defined by its two Cartesian coordinates with values $x$ and $y$ and its direction with value $\\theta$.\n",
+ "We use the kinematic equations of motion to model a deterministic state prediction.\n",
+ "This is our motion model (or transition model).\n",
+ " \n",
+ "Next, we need a sensor model.\n",
+ "There can be two kinds of sensor models, the first assumes that the sensors detect _stable_, _recognizable_ features of the environment called __landmarks__.\n",
+ "The robot senses the location and bearing of each landmark and updates its belief according to that.\n",
+ "We can also assume the noise in measurements to be Gaussian, to simplify things.\n",
+ " \n",
+ "Another kind of sensor model is used for an array of range sensors, each of which has a fixed bearing relative to the robot.\n",
+ "These sensors provide a set of range values in each direction.\n",
+ "This will also be corrupted by Gaussian noise, but we can assume that the errors for different beam directions are independent and identically distributed.\n",
+ " \n",
+ "After evidence comes in, the robot updates its belief state and reweights the particle distribution to better aproximate the actual distribution.\n",
+ " \n",
+ " \n",
+ "Let's have a look at how this algorithm is implemented in the module"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 89,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ " Codestin Search App \n",
+ " \n",
+ " \n",
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "def monte_carlo_localization ( a , z , N , P_motion_sample , P_sensor , m , S = None ): \n",
+ " """Monte Carlo localization algorithm from Fig 25.9""" \n",
+ "\n",
+ " def ray_cast ( sensor_num , kin_state , m ): \n",
+ " return m . ray_cast ( sensor_num , kin_state ) \n",
+ "\n",
+ " M = len ( z ) \n",
+ " W = [ 0 ] * N \n",
+ " S_ = [ 0 ] * N \n",
+ " W_ = [ 0 ] * N \n",
+ " v = a [ 'v' ] \n",
+ " w = a [ 'w' ] \n",
+ "\n",
+ " if S is None : \n",
+ " S = [ m . sample () for _ in range ( N )] \n",
+ "\n",
+ " for i in range ( N ): \n",
+ " S_ [ i ] = P_motion_sample ( S [ i ], v , w ) \n",
+ " W_ [ i ] = 1 \n",
+ " for j in range ( M ): \n",
+ " z_ = ray_cast ( j , S_ [ i ], m ) \n",
+ " W_ [ i ] = W_ [ i ] * P_sensor ( z [ j ], z_ ) \n",
+ "\n",
+ " S = weighted_sample_with_replacement ( N , S_ , W_ ) \n",
+ " return S \n",
+ " \n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "psource(monte_carlo_localization)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Our implementation of Monte Carlo Localization uses the range scan method.\n",
+ "The `ray_cast` helper function casts rays in different directions and stores the range values.\n",
+ " \n",
+ "`a` stores the `v` and `w` components of the robot's velocity.\n",
+ " \n",
+ "`z` is a range scan.\n",
+ " \n",
+ "`P_motion_sample` is the motion or transition model.\n",
+ " \n",
+ "`P_sensor` is the range sensor noise model.\n",
+ " \n",
+ "`m` is the 2D map of the environment\n",
+ " \n",
+ "`S` is a vector of samples of size N"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We'll now define a simple 2D map to run Monte Carlo Localization on.\n",
+ " \n",
+ "Let's say this is the map we want\n",
+ " "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 90,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAfAAAAFaCAYAAADhKw9uAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAASOUlEQVR4nO3df4ztd13n8dd779hAKSwlvaj9oaVaUJao0JGARFYpxIJIMbvZBcUUf6SJP6AQFIsmaGI0ZDWoiQZTC7aJDailArqKdPEHmrDVuQWEclEaiu2FSoclCLrGWnz7x5yScXrnzvSc750zn9PHI7mZ8+M75/v+3Dszz/s958w51d0BAMbyn5Y9AADw4Ak4AAxIwAFgQAIOAAMScAAYkIADwIAEHAAGJOBwiFXVx6vq2Tsue2lV/cUEt91V9dWL3g6wHAIOAAMScBhYVZ1bVW+tqs2quqOqXr7tuqdW1Xur6rNVdXdV/UpVnTG77j2zzT5QVf9YVf+zqr6lqk5U1aur6p7Z57ywqp5XVX9bVZ+pqp/Yz+3Pru+qenlVfayqPl1VP19VfubARHwzwaBmMfy9JB9Icl6SS5O8oqq+bbbJF5K8Msk5SZ4+u/6HkqS7nznb5uu7+6zu/q3Z+S9L8rDZ7b02ya8neUmSS5J8c5LXVtVFe93+Nt+ZZD3JU5JcnuT7plg7kJTXQofDq6o+nq1A3rft4jOS3JrkVUl+p7u/Ytv2r0ny+O7+3pPc1iuS/Nfu/s7Z+U5ycXffPjv/LUn+MMlZ3f2Fqnpkks8leVp33zLb5liSn+nut+3z9p/b3e+cnf+hJP+tuy9d4K8EmFlb9gDAnl7Y3f/n/jNV9dIkP5DkK5OcW1Wf3bbtkSR/Ptvu8Ulen60j4DOz9f1+bI99/b/u/sLs9D/PPn5q2/X/nOSsB3H7d207/XdJzt1j/8A+uQsdxnVXkju6+9Hb/jyyu583u/4NST6SraPsRyX5iSQ14f73c/sXbDv9FUk+OeH+4SFNwGFcf5nkc1X141X18Ko6UlVPqqpvnF1//13g/1hVX5PkB3d8/qeSXJT57XX7SfJjVXV2VV2Q5Kokv3WSbYA5CDgManZX93ck+YYkdyT5dJJrk/zn2SY/muS7knw+W09G2xnPn05y/exZ5P9jjhH2uv0keXu27lZ/f5L/neSNc+wHOAlPYgNOi51PkgOm5QgcAAYk4AAwIHehA8CAHIEDwIAO9IVczjnnnL7wwgsPcpfAijh2bK/XoGE/LrnkkmWPcFoc9NfHQf49Hjt27NPdfXTn5Qd6F/r6+npvbGwc2P6A1VE15WvQPHSt6sOmB/31cZB/j1V1rLvXd17uLnQAGJCAA8CABBwABiTgADAgAQeAAQk4AAxIwAFgQAIOAAMScAAY0EIBr6rLqupvqur2qrp6qqEAgFObO+BVdSTJryZ5bpInJnlxVT1xqsEAgN0tcgT+1CS3d/fHuvveJG9Jcvk0YwEAp7JIwM9Lcte28ydml/0HVXVlVW1U1cbm5uYCuwMA7rdIwE/21i8PeHuW7r6mu9e7e/3o0Qe8GxoAMIdFAn4iyQXbzp+f5JOLjQMA7MciAf+rJBdX1eOq6owkL0ryjmnGAgBOZW3eT+zu+6rqR5L8UZIjSd7U3bdNNhkAsKu5A54k3f0HSf5golkAgH3ySmwAMCABB4ABCTgADEjAAWBAAg4AAxJwABiQgAPAgBb6PXAA2E3Vyd4yg6k4AgeAAQk4AAxIwAFgQAIOAAMScAAYkIADwIAEHAAGJOAAMCABB4ABCTgADEjAAWBAAg4AAxJwABiQgAPAgAQcAAYk4AAwIAEHgAEJOAAMSMABYEACDgADEnAAGJCAA8CABBwABiTgADAgAQeAAQk4AAyouvvgdlZ1cDuDh6iD/J4+SFW17BFWwgH/zD+wfR20A/57PNbd6zsvdwQOAAMScAAYkIADwIAEHAAGJOAAMCABB4ABCTgADEjAAWBAAg4AAxJwABjQ3AGvqguq6k+q6nhV3VZVV005GACwu7UFPve+JK/q7lur6pFJjlXVzd394YlmAwB2MfcReHff3d23zk5/PsnxJOdNNRgAsLtFjsC/qKouTPLkJLec5Lork1w5xX4AgC0Lv51oVZ2V5M+S/Gx337THtqv5PodwiHg7UU7F24lOY/i3E62qL0ny1iQ37BVvAGA6izwLvZK8Mcnx7n79dCMBAHtZ5Aj8GUm+J8mzqur9sz/Pm2guAOAU5n4SW3f/RZLVfYADAA4xr8QGAAMScAAYkIADwIAEHAAGJOAAMCABB4ABCTgADEjAAWBAk7wb2X5dcskl2djYOMhdAsBKcgQOAAMScAAYkIADwIAEHAAGJOAAMCABB4ABCTgADEjAAWBAAg4AAxJwABiQgAPAgAQcAAYk4AAwIAEHgAEJOAAMSMABYEACDgADEnAAGJCAA8CABBwABiTgADAgAQeAAQk4AAxIwAFgQAIOAAMScAAY0NqyBzhdqmrZIwDAaeMIHAAGJOAAMCABB4ABCTgADEjAAWBAAg4AAxJwABiQgAPAgAQcAAYk4AAwoIUDXlVHqup9VfX7UwwEAOxtiiPwq5Icn+B2AIB9WijgVXV+km9Pcu004wAA+7HoEfgvJXl1kn/bbYOqurKqNqpqY3Nzc8HdAQDJAgGvqucnuae7j51qu+6+prvXu3v96NGj8+4OANhmkSPwZyR5QVV9PMlbkjyrqn5zkqkAgFOaO+Dd/ZruPr+7L0zyoiR/3N0vmWwyAGBXfg8cAAa0NsWNdPefJvnTKW4LANibI3AAGJCAA8CABBwABiTgADAgAQeAAQk4AAxIwAFgQJP8Hvhh1N3LHgGYUFUtewQ4VByBA8CABBwABiTgADAgAQeAAQk4AAxIwAFgQAIOAAMScAAYkIADwIAEHAAGJOAAMCABB4ABCTgADEjAAWBAAg4AAxJwABiQgAPAgAQcAAYk4AAwIAEHgAEJOAAMSMABYEACDgADEnAAGJCAA8CABBwABiTgADAgAQeAAQk4AAxIwAFgQAIOAAMScAAYkIADwIAEHAAGJOAAMCABB4ABCTgADGihgFfVo6vqxqr6SFUdr6qnTzUYALC7tQU//5eTvLO7/3tVnZHkzAlmAgD2MHfAq+pRSZ6Z5KVJ0t33Jrl3mrEAgFNZ5C70i5JsJvmNqnpfVV1bVY/YuVFVXVlVG1W1sbm5ucDuAID7LRLwtSRPSfKG7n5ykn9KcvXOjbr7mu5e7+71o0ePLrA7AOB+iwT8RJIT3X3L7PyN2Qo6AHCazR3w7v77JHdV1RNmF12a5MOTTAUAnNKiz0J/WZIbZs9A/1iS7118JABgLwsFvLvfn2R9olkAgH3ySmwAMCABB4ABCTgADEjAAWBAAg4AAxJwABiQgAPAgAQcAAa06CuxkaSqlj0Ch1x3L3sEYMU4AgeAAQk4AAxIwAFgQAIOAAMScAAYkIADwIAEHAAGJOAAMCABB4ABCTgADEjAAWBAAg4AAxJwABiQgAPAgAQcAAYk4AAwIAEHgAEJOAAMSMABYEACDgADEnAAGJCAA8CABBwABiTgADAgAQeAAQk4AAxobdkDAOxHdy97BB6kg/w3q6oD29dh4QgcAAYk4AAwIAEHgAEJOAAMSMABYEACDgADEnAAGJCAA8CABBwABiTgADCghQJeVa+sqtuq6kNV9eaqethUgwEAu5s74FV1XpKXJ1nv7iclOZLkRVMNBgDsbtG70NeSPLyq1pKcmeSTi48EAOxl7oB39yeS/EKSO5PcneQfuvtdO7erqiuraqOqNjY3N+efFAD4okXuQj87yeVJHpfk3CSPqKqX7Nyuu6/p7vXuXj969Oj8kwIAX7TIXejPTnJHd292978muSnJN00zFgBwKosE/M4kT6uqM2vrndQvTXJ8mrEAgFNZ5DHwW5LcmOTWJB+c3dY1E80FAJzC2iKf3N0/leSnJpoFANgnr8QGAAMScAAYkIADwIAEHAAGJOAAMCABB4ABCTgADGih3wNnS3cvewSAQ2frRTo5XRyBA8CABBwABiTgADAgAQeAAQk4AAxIwAFgQAIOAAMScAAYkIADwIAEHAAGJOAAMCABB4ABCTgADEjAAWBAAg4AAxJwABiQgAPAgAQcAAYk4AAwIAEHgAEJOAAMSMABYEACDgADEnAAGJCAA8CABBwABiTgADCgtWUPsAqqatkjcMh197JHGJ7vs2kc5NfiQe7rofj14QgcAAYk4AAwIAEHgAEJOAAMSMABYEACDgADEnAAGJCAA8CABBwABrRnwKvqTVV1T1V9aNtlj6mqm6vqo7OPZ5/eMQGA7fZzBH5dkst2XHZ1knd398VJ3j07DwAckD0D3t3vSfKZHRdfnuT62enrk7xw4rkAgFOY9zHwL+3uu5Nk9vGxu21YVVdW1UZVbWxubs65OwBgu9P+JLbuvqa717t7/ejRo6d7dwDwkDBvwD9VVV+eJLOP90w3EgCwl3kD/o4kV8xOX5Hk7dOMAwDsx35+jezNSd6b5AlVdaKqvj/J65I8p6o+muQ5s/MAwAFZ22uD7n7xLlddOvEsAMA+eSU2ABiQgAPAgAQcAAYk4AAwIAEHgAEJOAAMSMABYEACDgADqu4+uJ1VbSb5uwf5aeck+fRpGOcwWNW1req6ktVd26quK1ndta3qupLVXdu86/rK7n7Au4EdaMDnUVUb3b2+7DlOh1Vd26quK1ndta3qupLVXduqritZ3bVNvS53oQPAgAQcAAY0QsCvWfYAp9Gqrm1V15Ws7tpWdV3J6q5tVdeVrO7aJl3XoX8MHAB4oBGOwAGAHQQcAAZ0qANeVZdV1d9U1e1VdfWy55lCVV1QVX9SVcer6raqumrZM02tqo5U1fuq6veXPctUqurRVXVjVX1k9m/39GXPNJWqeuXsa/FDVfXmqnrYsmeaR1W9qaruqaoPbbvsMVV1c1V9dPbx7GXOOK9d1vbzs6/Hv66q362qRy9zxnmcbF3brvvRquqqOmcZsy1qt7VV1ctmXbutqv7XIvs4tAGvqiNJfjXJc5M8McmLq+qJy51qEvcleVV3f22SpyX54RVZ13ZXJTm+7CEm9stJ3tndX5Pk67Mi66uq85K8PMl6dz8pyZEkL1ruVHO7LsllOy67Osm7u/viJO+enR/RdXng2m5O8qTu/rokf5vkNQc91ASuywPXlaq6IMlzktx50ANN6LrsWFtVfWuSy5N8XXf/lyS/sMgODm3Akzw1ye3d/bHuvjfJW7K18KF1993dfevs9OezFYLzljvVdKrq/CTfnuTaZc8ylap6VJJnJnljknT3vd392eVONam1JA+vqrUkZyb55JLnmUt3vyfJZ3ZcfHmS62enr0/ywgMdaiInW1t3v6u775ud/b9Jzj/wwRa0y79ZkvxiklcnGfZZ1rus7QeTvK67/2W2zT2L7OMwB/y8JHdtO38iKxS6JKmqC5M8Ockty51kUr+UrW+8f1v2IBO6KMlmkt+YPTRwbVU9YtlDTaG7P5Gto4A7k9yd5B+6+13LnWpSX9rddydb/3lO8tglz3O6fF+SP1z2EFOoqhck+UR3f2DZs5wGj0/yzVV1S1X9WVV94yI3dpgDXie5bNj/je1UVWcleWuSV3T355Y9zxSq6vlJ7unuY8ueZWJrSZ6S5A3d/eQk/5Rx74r9D2aPCV+e5HFJzk3yiKp6yXKn4sGoqp/M1kNzNyx7lkVV1ZlJfjLJa5c9y2myluTsbD18+mNJfruqTta6fTnMAT+R5IJt58/PoHft7VRVX5KteN/Q3Tcte54JPSPJC6rq49l6yONZVfWbyx1pEieSnOju++8puTFbQV8Fz05yR3dvdve/JrkpyTcteaYpfaqqvjxJZh8XusvysKmqK5I8P8l392q8qMdXZes/kx+Y/Rw5P8mtVfVlS51qOieS3NRb/jJb91TO/SS9wxzwv0pycVU9rqrOyNYTa96x5JkWNvvf1huTHO/u1y97nil192u6+/zuvjBb/15/3N3DH811998nuauqnjC76NIkH17iSFO6M8nTqurM2dfmpVmRJ+jNvCPJFbPTVyR5+xJnmVRVXZbkx5O8oLv//7LnmUJ3f7C7H9vdF85+jpxI8pTZ9+AqeFuSZyVJVT0+yRlZ4F3XDm3AZ0/O+JEkf5StHyi/3d23LXeqSTwjyfdk6+j0/bM/z1v2UOzpZUluqKq/TvINSX5uyfNMYnavwo1Jbk3ywWz9TBjyZSyr6s1J3pvkCVV1oqq+P8nrkjynqj6arWc1v26ZM85rl7X9SpJHJrl59nPk15Y65Bx2WddK2GVtb0py0exXy96S5IpF7jnxUqoAMKBDewQOAOxOwAFgQAIOAAMScAAYkIADwIAEHAAGJOAAMKB/B24h+wUcnnY9AAAAAElFTkSuQmCC\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "m = MCLmap([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0],\n",
+ " [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0],\n",
+ " [1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0],\n",
+ " [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0],\n",
+ " [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0],\n",
+ " [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0],\n",
+ " [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0],\n",
+ " [0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0],\n",
+ " [0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0],\n",
+ " [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0],\n",
+ " [0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0]])\n",
+ "\n",
+ "heatmap(m.m, cmap='binary')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's define the motion model as a function `P_motion_sample`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 91,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def P_motion_sample(kin_state, v, w):\n",
+ " \"\"\"Sample from possible kinematic states.\n",
+ " Returns from a single element distribution (no uncertainity in motion)\"\"\"\n",
+ " pos = kin_state[:2]\n",
+ " orient = kin_state[2]\n",
+ "\n",
+ " # for simplicity the robot first rotates and then moves\n",
+ " orient = (orient + w)%4\n",
+ " for _ in range(orient):\n",
+ " v = (v[1], -v[0])\n",
+ " pos = vector_add(pos, v)\n",
+ " return pos + (orient,)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Define the sensor model as a function `P_sensor`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 92,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def P_sensor(x, y):\n",
+ " \"\"\"Conditional probability for sensor reading\"\"\"\n",
+ " # Need not be exact probability. Can use a scaled value.\n",
+ " if x == y:\n",
+ " return 0.8\n",
+ " elif abs(x - y) <= 2:\n",
+ " return 0.05\n",
+ " else:\n",
+ " return 0"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Initializing variables."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 93,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "a = {'v': (0, 0), 'w': 0}\n",
+ "z = (2, 4, 1, 6)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's run `monte_carlo_localization` with these parameters to find a sample distribution S."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 94,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "S = monte_carlo_localization(a, z, 1000, P_motion_sample, P_sensor, m)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's plot the values in the sample distribution `S`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 95,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "GRID:\n",
+ " 0 0 12 0 143 14 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 17 52 201 6 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 3 5 19 9 3 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 6 166 0 21 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 1 11 75 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 73 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0\n",
+ "124 0 0 0 0 0 0 1 0 3 0 0 0 0 0 0 0\n",
+ " 0 0 0 14 4 15 1 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"
+ ]
+ },
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAfAAAAFaCAYAAADhKw9uAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAATEElEQVR4nO3df6zldX3n8debuSK/F2Swlt+yi7pq2upOjdbU7Qqs+KNis5td7dJg2w1Ju1U0thZtIt1s0pi2cdukjV0WLSQl2i7S6nZbFW271qyLHVBUxFYiCKMIA4aCXSsF3vvHPSS317lzh3u+c858Lo9HMrn3nPO95/P+zNy5z/mee+6Z6u4AAGM5bNkDAACPn4ADwIAEHAAGJOAAMCABB4ABCTgADEjAAWBAAg6HsKq6varOXXfd66vqkxPcd1fVP5v3foDlEHAAGJCAw8Cq6uSq+kBV7a2q26rqjWtue0FVfaqq7q+qu6rqt6rq8Nltn5gddlNVfauq/n1V/UhV7amqt1bVPbOPeU1VvaKq/qaqvllVbz+Q+5/d3lX1xqr6SlXdW1W/VlW+5sBE/GWCQc1i+D+T3JTklCTnJHlTVb1sdsgjSd6cZGeSF81u/9kk6e6XzI75/u4+prt/f3b5aUmOmN3fO5L89yQXJvkXSX44yTuq6qzN7n+NH0uyK8nzk1yQ5Kem2DuQlNdCh0NXVd2e1UA+vObqw5PcmOQtSf5Hd5++5vi3JXlGd//kPu7rTUn+ZXf/2OxyJzm7u2+dXf6RJH+a5JjufqSqjk3yQJIXdvf1s2NuSPJfuvuPDvD+X97dH55d/tkk/6a7z5njtwSYWVn2AMCmXtPdH3vsQlW9Psl/THJGkpOr6v41x+5I8pez456R5F1ZPQM+Kqt/32/YZK37uvuR2fvfnr29e83t305yzOO4/zvXvP/VJCdvsj5wgDyEDuO6M8lt3X38ml/HdvcrZre/O8mXsnqWfVyStyepCdc/kPs/bc37pyf5+oTrwxOagMO4Pp3kgar6xao6sqp2VNVzq+oHZ7c/9hD4t6rqWUl+Zt3H353krGzdZvefJL9QVSdU1WlJLkny+/s4BtgCAYdBzR7q/tEkP5DktiT3JrkiyT+ZHfLzSX48yYNZfTLa+nj+cpKrZs8i/3dbGGGz+0+SD2b1YfXPJvlfSd6zhXWAffAkNuCgWP8kOWBazsABYEACDgAD8hA6AAzIGTgADGihL+Syc+eJfebpp21+4GgefWTzY6Z02I6FLfXQbZ9b2FqHn/Gcha21yN9DgHnc8Jmb7u3uk9Zfv9CAn3n6adn9yY9tfuBg+u8fWOh6dcRxC1vr9gtPWdhaZ1zxhwtbq444fmFrAcyjjj7pq/u63kPoADAgAQeAAQk4AAxIwAFgQAIOAAMScAAYkIADwIAEHAAGJOAAMKC5Al5V51fVX1fVrVV16VRDAQD7t+WAV9WOJL+d5OVJnp3kdVX17KkGAwA2Ns8Z+AuS3NrdX+nuh5K8P8kF04wFAOzPPAE/Jcmday7vmV33j1TVxVW1u6p27733vjmWAwAeM0/Aax/X9Xdd0X15d+/q7l0n7TxxjuUAgMfME/A9Sdb+596nJvn6fOMAAAdinoD/VZKzq+rpVXV4ktcm+dA0YwEA+7Oy1Q/s7oer6ueSfCTJjiTv7e6bJ5sMANjQlgOeJN39J0n+ZKJZAIAD5JXYAGBAAg4AAxJwABiQgAPAgAQcAAYk4AAwIAEHgAHN9XPgrKojjlv2CAfNGZfftLC1+va/XNha//nHL17YWkly2advX9hatfLkha0FLI8zcAAYkIADwIAEHAAGJOAAMCABB4ABCTgADEjAAWBAAg4AAxJwABiQgAPAgAQcAAYk4AAwIAEHgAEJOAAMSMABYEACDgADEnAAGJCAA8CABBwABiTgADAgAQeAAQk4AAxIwAFgQAIOAAMScAAYkIADwIBWlj0Ah7Y6aufi1nrWjy5srV++8a6FrQVwMDgDB4ABCTgADEjAAWBAAg4AAxJwABiQgAPAgAQcAAYk4AAwIAEHgAEJOAAMaMsBr6rTqurPq+qWqrq5qi6ZcjAAYGPzvBb6w0ne0t03VtWxSW6oquu6+4sTzQYAbGDLZ+DdfVd33zh7/8EktyQ5ZarBAICNTfI98Ko6M8nzkly/j9surqrdVbV77733TbEcADzhzR3wqjomyQeSvKm7H1h/e3df3t27unvXSTtPnHc5ACBzBryqnpTVeF/d3ddOMxIAsJl5noVeSd6T5Jbuftd0IwEAm5nnDPzFSX4iyUur6rOzX6+YaC4AYD+2/GNk3f3JJDXhLADAAfJKbAAwIAEHgAEJOAAMSMABYEACDgADEnAAGJCAA8CABBwABjTP/wfOkvSjjyxwsQWu9e37F7fW4Ucvbq0kWTliYUvVYTsWthawPM7AAWBAAg4AAxJwABiQgAPAgAQcAAYk4AAwIAEHgAEJOAAMSMABYEACDgADEnAAGJCAA8CABBwABiTgADAgAQeAAQk4AAxIwAFgQAIOAAMScAAYkIADwIAEHAAGJOAAMCABB4ABCTgADEjAAWBAAg4AA1pZ9gA8fnXYjgWutsC1jnnq4tYCGJwzcAAYkIADwIAEHAAGJOAAMCABB4ABCTgADEjAAWBAAg4AAxJwABiQgAPAgOYOeFXtqKrPVNUfTzEQALC5Kc7AL0lyywT3AwAcoLkCXlWnJnllkiumGQcAOBDznoH/RpK3Jnl0owOq6uKq2l1Vu/fee9+cywEAyRwBr6pXJbmnu2/Y33HdfXl37+ruXSftPHGrywEAa8xzBv7iJK+uqtuTvD/JS6vq9yaZCgDYry0HvLvf1t2ndveZSV6b5M+6+8LJJgMANuTnwAFgQCtT3El3/0WSv5jivgCAzTkDB4ABCTgADEjAAWBAAg4AAxJwABiQgAPAgAQcAAY0yc+BP9H1w99Z6HrXvfL0ha31rz9y98LW6ge/sbC16tinLWwtgIPBGTgADEjAAWBAAg4AAxJwABiQgAPAgAQcAAYk4AAwIAEHgAEJOAAMSMABYEACDgADEnAAGJCAA8CABBwABiTgADAgAQeAAQk4AAxIwAFgQAIOAAMScAAYkIADwIAEHAAGJOAAMCABB4ABCTgADEjAAWBAAg4AA1pZ9gDbQa08eaHrnffhbyxsrf7Og4tb6//8t4WtVS+7bGFrARwMzsABYEACDgADEnAAGJCAA8CABBwABiTgADAgAQeAAQk4AAxIwAFgQHMFvKqOr6prqupLVXVLVb1oqsEAgI3N+1Kqv5nkw939b6vq8CRHTTATALCJLQe8qo5L8pIkr0+S7n4oyUPTjAUA7M88D6GflWRvkt+tqs9U1RVVdfT6g6rq4qraXVW799573xzLAQCPmSfgK0men+Td3f28JH+X5NL1B3X35d29q7t3nbTzxDmWAwAeM0/A9yTZ093Xzy5fk9WgAwAH2ZYD3t3fSHJnVT1zdtU5Sb44yVQAwH7N+yz0NyS5evYM9K8k+cn5RwIANjNXwLv7s0l2TTQLAHCAvBIbAAxIwAFgQAIOAAMScAAYkIADwIAEHAAGJOAAMCABB4ABzftKbI/PA3fl0Y/9ykKWOuzcty9knWWoqsUt9uRjF7ZUveyyha3FNLp7YWst9PMeBuAMHAAGJOAAMCABB4ABCTgADEjAAWBAAg4AAxJwABiQgAPAgAQcAAYk4AAwIAEHgAEJOAAMSMABYEACDgADEnAAGJCAA8CABBwABiTgADAgAQeAAQk4AAxIwAFgQAIOAAMScAAYkIADwIAEHAAGJOAAMKCVRS72yAPfzLc+evVC1jru3LcvZB04EN29sLWqamFrpR9d3Fq1Y3FrwQCcgQPAgAQcAAYk4AAwIAEHgAEJOAAMSMABYEACDgADEnAAGJCAA8CABBwABjRXwKvqzVV1c1V9oareV1VHTDUYALCxLQe8qk5J8sYku7r7uUl2JHntVIMBABub9yH0lSRHVtVKkqOSfH3+kQCAzWw54N39tSS/nuSOJHcl+dvu/uj646rq4qraXVW77/v2Av/nIgDYxuZ5CP2EJBckeXqSk5McXVUXrj+uuy/v7l3dvevEIz1nDgCmME9Rz01yW3fv7e5/SHJtkh+aZiwAYH/mCfgdSV5YVUdVVSU5J8kt04wFAOzPPN8Dvz7JNUluTPL52X1dPtFcAMB+rMzzwd19WZLLJpoFADhAnlUGAAMScAAYkIADwIAEHAAGJOAAMCABB4ABCTgADGiunwN/vHac+pwc96sfW+SS21L//f2LW+xJRy9urYceXNxaR5ywuLWSrL5Y4fZTh+1Y9gjwhOUMHAAGJOAAMCABB4ABCTgADEjAAWBAAg4AAxJwABiQgAPAgAQcAAYk4AAwIAEHgAEJOAAMSMABYEACDgADEnAAGJCAA8CABBwABiTgADAgAQeAAQk4AAxIwAFgQAIOAAMScAAYkIADwIAEHAAGJOAAMCABB4ABrSx7AB6/OuL4ZY9wcBz5lGVPADAMZ+AAMCABB4ABCTgADEjAAWBAAg4AAxJwABiQgAPAgAQcAAYk4AAwoE0DXlXvrap7quoLa657SlVdV1Vfnr094eCOCQCsdSBn4FcmOX/ddZcm+Xh3n53k47PLAMCCbBrw7v5Ekm+uu/qCJFfN3r8qyWsmngsA2I+tfg/8e7r7riSZvX3qRgdW1cVVtbuqdu+9974tLgcArHXQn8TW3Zd3967u3nXSzhMP9nIA8ISw1YDfXVXfmySzt/dMNxIAsJmtBvxDSS6avX9Rkg9OMw4AcCAO5MfI3pfkU0meWVV7quqnk7wzyXlV9eUk580uAwALsrLZAd39ug1uOmfiWQCAA+SV2ABgQAIOAAMScAAYkIADwIAEHAAGJOAAMCABB4ABCTgADKi6e3GLVe1N8tXH+WE7k9x7EMY5FGzXvW3XfSXbd2/bdV/J9t3bdt1Xsn33ttV9ndHdJ62/cqEB34qq2t3du5Y9x8GwXfe2XfeVbN+9bdd9Jdt3b9t1X8n23dvU+/IQOgAMSMABYEAjBPzyZQ9wEG3XvW3XfSXbd2/bdV/J9t3bdt1Xsn33Num+DvnvgQMA322EM3AAYB0BB4ABHdIBr6rzq+qvq+rWqrp02fNMoapOq6o/r6pbqurmqrpk2TNNrap2VNVnquqPlz3LVKrq+Kq6pqq+NPuze9GyZ5pKVb159rn4hap6X1UdseyZtqKq3ltV91TVF9Zc95Squq6qvjx7e8IyZ9yqDfb2a7PPx89V1R9W1fHLnHEr9rWvNbf9fFV1Ve1cxmzz2mhvVfWGWddurqpfnWeNQzbgVbUjyW8neXmSZyd5XVU9e7lTTeLhJG/p7n+e5IVJ/tM22ddalyS5ZdlDTOw3k3y4u5+V5PuzTfZXVackeWOSXd393CQ7krx2uVNt2ZVJzl933aVJPt7dZyf5+OzyiK7Md+/tuiTP7e7vS/I3Sd626KEmcGW+e1+pqtOSnJfkjkUPNKErs25vVfWvklyQ5Pu6+zlJfn2eBQ7ZgCd5QZJbu/sr3f1QkvdndeND6+67uvvG2fsPZjUEpyx3qulU1alJXpnkimXPMpWqOi7JS5K8J0m6+6Huvn+5U01qJcmRVbWS5KgkX1/yPFvS3Z9I8s11V1+Q5KrZ+1clec1Ch5rIvvbW3R/t7odnF/9vklMXPticNvgzS5L/muStSYZ9lvUGe/uZJO/s7u/MjrlnnjUO5YCfkuTONZf3ZBuFLkmq6swkz0ty/XInmdRvZPUv3qPLHmRCZyXZm+R3Z98auKKqjl72UFPo7q9l9SzgjiR3Jfnb7v7ocqea1Pd0913J6j+ekzx1yfMcLD+V5E+XPcQUqurVSb7W3Tcte5aD4BlJfriqrq+q/11VPzjPnR3KAa99XDfsv8bWq6pjknwgyZu6+4FlzzOFqnpVknu6+4ZlzzKxlSTPT/Lu7n5ekr/LuA/F/iOz7wlfkOTpSU5OcnRVXbjcqXg8quqXsvqtuauXPcu8quqoJL+U5B3LnuUgWUlyQla/ffoLSf6gqvbVugNyKAd8T5LT1lw+NYM+tLdeVT0pq/G+uruvXfY8E3pxkldX1e1Z/ZbHS6vq95Y70iT2JNnT3Y89UnJNVoO+HZyb5Lbu3tvd/5Dk2iQ/tOSZpnR3VX1vkszezvWQ5aGmqi5K8qok/6G3x4t6/NOs/mPyptnXkVOT3FhVT1vqVNPZk+TaXvXprD5SueUn6R3KAf+rJGdX1dOr6vCsPrHmQ0ueaW6zf229J8kt3f2uZc8zpe5+W3ef2t1nZvXP68+6e/izue7+RpI7q+qZs6vOSfLFJY40pTuSvLCqjpp9bp6TbfIEvZkPJblo9v5FST64xFkmVVXnJ/nFJK/u7v+37Hmm0N2f7+6ndveZs68je5I8f/Z3cDv4oyQvTZKqekaSwzPH/7p2yAZ89uSMn0vykax+QfmD7r55uVNN4sVJfiKrZ6efnf16xbKHYlNvSHJ1VX0uyQ8k+ZUlzzOJ2aMK1yS5Mcnns/o1YciXsayq9yX5VJJnVtWeqvrpJO9Mcl5VfTmrz2p+5zJn3KoN9vZbSY5Nct3s68jvLHXILdhgX9vCBnt7b5KzZj9a9v4kF83zyImXUgWAAR2yZ+AAwMYEHAAGJOAAMCABB4ABCTgADEjAAWBAAg4AA/r/85kBLqIO9qEAAAAASUVORK5CYII=\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "grid = [[0]*17 for _ in range(11)]\n",
+ "for x, y, _ in S:\n",
+ " if 0 <= x < 11 and 0 <= y < 17:\n",
+ " grid[x][y] += 1\n",
+ "print(\"GRID:\")\n",
+ "print_table(grid)\n",
+ "heatmap(grid, cmap='Oranges')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The distribution is highly concentrated at `(5, 3)`, but the robot is not very confident about its position as some other cells also have high probability values."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's look at another scenario."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 96,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "GRID:\n",
+ "0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ "0 0 0 0 0 0 0 0 1000 0 0 0 0 0 0 0 0\n",
+ "0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ "0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ "0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ "0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ "0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ "0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ "0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ "0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ "0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n"
+ ]
+ },
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAfAAAAFaCAYAAADhKw9uAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAARl0lEQVR4nO3df6zld13n8dd7OzbQFpbaKUp/YOluwWWJSnckIJF1KWQLshSzmxV2MUXdNNEVCkGxaIIkm2zIalhNNJhuwTaxAd1SBV1FKv5gSdjqtFChFKWh0A5UOlOCoGu2Ft/7xz01l8vcucM9Z+bM+/J4JJN7fnzv+b4/nbn3eb/fc+5pdXcAgFn+0boHAAC+dgIOAAMJOAAMJOAAMJCAA8BAAg4AAwk4AAwk4HAKq6pPVdXzttz2iqr6wAoeu6vqny77OMB6CDgADCTgMFhVnVdV76yqw1V1T1W9atN9z6iqD1bVF6rq/qr6xao6fXHf+xeb3VFVf11V319V31NVh6rqdVX1wOJzXlJVL6yqv6iqz1fVTx3P4y/u76p6VVV9sqqOVNXPVpXvObAivphgqEUMfyvJHUnOT3JZkldX1b9ebPLlJK9Jsj/Jsxb3/2iSdPdzFtt8e3ef1d2/trj+zUketXi8NyT5H0lenuRfJPnuJG+oqot3evxNvi/JgSSXJrkiyQ+tYu1AUt4LHU5dVfWpbATy4U03n57k9iSvTfI/u/uJm7Z/fZInd/cPHuWxXp3kX3b39y2ud5JLuvvuxfXvSfK7Sc7q7i9X1WOSfDHJM7v71sU2tyX5L939m8f5+C/o7vcsrv9okn/b3Zct8Z8EWNi37gGAHb2ku3//kStV9Yok/ynJtyQ5r6q+sGnb05L878V2T07y5mwcAZ+Rja/323bY14Pd/eXF5b9dfPzcpvv/NslZX8Pj37fp8qeTnLfD/oHj5BQ6zHVfknu6+3Gb/jymu1+4uP8tST6ejaPsxyb5qSS1wv0fz+NfuOnyE5N8doX7h69rAg5z/UmSL1bVT1bVo6vqtKp6WlV95+L+R06B/3VVfWuSH9ny+Z9LcnF2b6fHT5KfqKqzq+rCJFcn+bWjbAPsgoDDUItT3f8myXckuSfJkSTXJfnHi01+PMl/SPKlbLwYbWs835jkhsWryP/9LkbY6fGT5F3ZOK3+4ST/K8lbd7Ef4Ci8iA04Iba+SA5YLUfgADCQgAPAQE6hA8BAjsABYKCT+kYu+/ef0xc98cKdNwQAkiS3feiOI9197tbbT2rAL3rihTn4gd/feUMAIElSZ5776aPd7hQ6AAwk4AAwkIADwEACDgADCTgADCTgADCQgAPAQAIOAAMJOAAMtFTAq+ryqvrzqrq7qq5Z1VAAwLHtOuBVdVqSX0rygiRPTfKyqnrqqgYDALa3zBH4M5Lc3d2f7O6HkrwjyRWrGQsAOJZlAn5+kvs2XT+0uO0rVNVVVXWwqg4ePvLgErsDAB6xTMDrKLf1V93QfW13H+juA+fuP2eJ3QEAj1gm4IeSbP6fe1+Q5LPLjQMAHI9lAv6nSS6pqidV1elJXprk3asZCwA4ln27/cTufriqfizJ7yU5LcnbuvvOlU0GAGxr1wFPku7+nSS/s6JZAIDj5J3YAGAgAQeAgQQcAAYScAAYSMABYCABB4CBBBwABlrq98CBU88bL33CydvX7feftH0BX8kROAAMJOAAMJCAA8BAAg4AAwk4AAwk4AAwkIADwEACDgADCTgADCTgADCQgAPAQAIOAAMJOAAMJOAAMJCAA8BAAg4AAwk4AAwk4AAwkIADwEACDgADCTgADCTgADCQgAPAQAIOAAMJOAAMJOAAMNC+dQ8ArNYbb79/3SMAJ4EjcAAYSMABYCABB4CBBBwABhJwABhIwAFgIAEHgIEEHAAGEnAAGEjAAWCgXQe8qi6sqj+sqruq6s6qunqVgwEA21vmvdAfTvLa7r69qh6T5LaquqW7P7ai2QCAbez6CLy77+/u2xeXv5TkriTnr2owAGB7K3kOvKouSvL0JLce5b6rqupgVR08fOTBVewOAL7uLR3wqjoryTuTvLq7v7j1/u6+trsPdPeBc/efs+zuAIAsGfCq+oZsxPvG7r55NSMBADtZ5lXoleStSe7q7jevbiQAYCfLHIE/O8kPJHluVX148eeFK5oLADiGXf8aWXd/IEmtcBYA4Dh5JzYAGEjAAWAgAQeAgQQcAAYScAAYSMABYCABB4CBBBwABhJwABhIwAFgIAEHgIEEHAAGEnAAGEjAAWAgAQeAgQQcAAYScAAYSMABYCABB4CBBBwABhJwABhIwAFgIAEHgIEEHAAGEnAAGEjAAWAgAQeAgQQcAAYScAAYSMABYCABB4CBBBwABhJwABhIwAFgIAEHgIEEHAAGEnAAGEjAAWAgAQeAgQQcAAYScAAYSMABYCABB4CBBBwABhJwABhIwAFgoKUDXlWnVdWHquq3VzEQALCzVRyBX53krhU8DgBwnJYKeFVdkOR7k1y3mnEAgOOx7BH4zyd5XZK/326Dqrqqqg5W1cHDRx5ccncAQLJEwKvqRUke6O7bjrVdd1/b3Qe6+8C5+8/Z7e4AgE2WOQJ/dpIXV9WnkrwjyXOr6ldXMhUAcEy7Dnh3v767L+jui5K8NMkfdPfLVzYZALAtvwcOAAPtW8WDdPcfJfmjVTwWALAzR+AAMJCAA8BAAg4AAwk4AAwk4AAwkIADwEACDgADCTgADCTgADCQgAPAQAIOAAMJOAAMJOAAMJCAA8BAAg4AAwk4AAwk4AAwkIADwEACDgADCTgADCTgADCQgAPAQAIOAAMJOAAMJOAAMJCAA8BAAg4AAwk4AAwk4AAwkIADwEACDgADCTgADCTgADCQgAPAQAIOAAMJOAAMJOAAMJCAA8BAAg4AAwk4AAwk4AAwkIADwEACDgADCTgADCTgADDQUgGvqsdV1U1V9fGququqnrWqwQCA7e1b8vN/Icl7uvvfVdXpSc5YwUwAwA52HfCqemyS5yR5RZJ090NJHlrNWADAsSxzCv3iJIeT/EpVfaiqrquqM7duVFVXVdXBqjp4+MiDS+wOAHjEMgHfl+TSJG/p7qcn+Zsk12zdqLuv7e4D3X3g3P3nLLE7AOARywT8UJJD3X3r4vpN2Qg6AHCC7Trg3f2XSe6rqqcsbrosycdWMhUAcEzLvgr9lUluXLwC/ZNJfnD5kQCAnSwV8O7+cJIDK5oFADhO3okNAAYScAAYSMABYCABB4CBBBwABhJwABhIwAFgIAEHgIEEHAAGEnAAGEjAAWAgAQeAgQQcAAYScAAYSMABYCABB4CBBBwABhJwABhIwAFgIAEHgIEEHAAGEnAAGEjAAWAgAQeAgQQcAAYScAAYSMABYCABB4CBBBwABhJwABhIwAFgIAEHgIEEHAAGEnAAGEjAAWAgAQeAgQQcAAYScAAYSMABYCABB4CBBBwABhJwABhIwAFgIAEHgIEEHAAGEnAAGGipgFfVa6rqzqr6aFW9vaoetarBAIDt7TrgVXV+klclOdDdT0tyWpKXrmowAGB7y55C35fk0VW1L8kZST67/EgAwE52HfDu/kySn0tyb5L7k/xVd79363ZVdVVVHayqg4ePPLj7SQGAf7DMKfSzk1yR5ElJzktyZlW9fOt23X1tdx/o7gPn7j9n95MCAP9gmVPoz0tyT3cf7u6/S3Jzku9azVgAwLEsE/B7kzyzqs6oqkpyWZK7VjMWAHAsyzwHfmuSm5LcnuQji8e6dkVzAQDHsG+ZT+7un0nyMyuaBQA4Tt6JDQAGEnAAGEjAAWAgAQeAgQQcAAYScAAYSMABYCABB4CBBBwABhJwABhIwAFgIAEHgIEEHAAGEnAAGEjAAWAgAQeAgQQcAAYScAAYSMABYCABB4CBBBwABhJwABhIwAFgIAEHgIEEHAAGEnAAGEjAAWAgAQeAgQQcAAYScAAYSMABYCABB4CBBBwABhJwABhIwAFgIAEHgIEEHAAGEnAAGEjAAWAgAQeAgQQcAAYScAAYSMABYCABB4CBBBwABtox4FX1tqp6oKo+uum2b6yqW6rqE4uPZ5/YMQGAzY7nCPz6JJdvue2aJO/r7kuSvG9xHQA4SXYMeHe/P8nnt9x8RZIbFpdvSPKSFc8FABzDbp8D/6buvj9JFh8fv92GVXVVVR2sqoOHjzy4y90BAJud8Bexdfe13X2guw+cu/+cE707APi6sNuAf66qnpAki48PrG4kAGAnuw34u5Ncubh8ZZJ3rWYcAOB4HM+vkb09yQeTPKWqDlXVDyd5U5LnV9Unkjx/cR0AOEn27bRBd79sm7suW/EsAMBx8k5sADCQgAPAQAIOAAMJOAAMJOAAMJCAA8BAAg4AAwk4AAxU3X3ydlZ1OMmnv8ZP25/kyAkY51SwV9e2V9eV7N217dV1JXt3bXt1XcneXdtu1/Ut3X3u1htPasB3o6oOdveBdc9xIuzVte3VdSV7d217dV3J3l3bXl1XsnfXtup1OYUOAAMJOAAMNCHg1657gBNor65tr64r2btr26vrSvbu2vbqupK9u7aVruuUfw4cAPhqE47AAYAtBBwABjqlA15Vl1fVn1fV3VV1zbrnWYWqurCq/rCq7qqqO6vq6nXPtGpVdVpVfaiqfnvds6xKVT2uqm6qqo8v/u6ete6ZVqWqXrP4t/jRqnp7VT1q3TPtRlW9raoeqKqPbrrtG6vqlqr6xOLj2euccbe2WdvPLv49/llV/UZVPW6dM+7G0da16b4fr6quqv3rmG1Z262tql656NqdVfXfltnHKRvwqjotyS8leUGSpyZ5WVU9db1TrcTDSV7b3f8syTOT/Oc9sq7Nrk5y17qHWLFfSPKe7v7WJN+ePbK+qjo/yauSHOjupyU5LclL1zvVrl2f5PItt12T5H3dfUmS9y2uT3R9vnpttyR5Wnd/W5K/SPL6kz3UClyfr15XqurCJM9Pcu/JHmiFrs+WtVXVv0pyRZJv6+5/nuTnltnBKRvwJM9Icnd3f7K7H0ryjmwsfLTuvr+7b19c/lI2QnD+eqdanaq6IMn3Jrlu3bOsSlU9Nslzkrw1Sbr7oe7+wnqnWql9SR5dVfuSnJHks2ueZ1e6+/1JPr/l5iuS3LC4fEOSl5zUoVbkaGvr7vd298OLq/8nyQUnfbAlbfN3liT/Pcnrkox9lfU2a/uRJG/q7v+32OaBZfZxKgf8/CT3bbp+KHsodElSVRcleXqSW9c7yUr9fDa+8P5+3YOs0MVJDif5lcVTA9dV1ZnrHmoVuvsz2TgKuDfJ/Un+qrvfu96pVuqbuvv+ZOOH5ySPX/M8J8oPJfnddQ+xClX14iSf6e471j3LCfDkJN9dVbdW1R9X1Xcu82CncsDrKLeN/Wlsq6o6K8k7k7y6u7+47nlWoapelOSB7r5t3bOs2L4klyZ5S3c/PcnfZO6p2K+weE74iiRPSnJekjOr6uXrnYqvRVX9dDaemrtx3bMsq6rOSPLTSd6w7llOkH1Jzs7G06c/keTXq+porTsup3LADyW5cNP1CzL01N5WVfUN2Yj3jd1987rnWaFnJ3lxVX0qG095PLeqfnW9I63EoSSHuvuRMyU3ZSPoe8HzktzT3Ye7+++S3Jzku9Y80yp9rqqekCSLj0udsjzVVNWVSV6U5D/23nhTj3+SjR8m71h8H7kgye1V9c1rnWp1DiW5uTf8STbOVO76RXqncsD/NMklVfWkqjo9Gy+sefeaZ1ra4qettya5q7vfvO55Vqm7X9/dF3T3Rdn4+/qD7h5/NNfdf5nkvqp6yuKmy5J8bI0jrdK9SZ5ZVWcs/m1elj3yAr2Fdye5cnH5yiTvWuMsK1VVlyf5ySQv7u7/u+55VqG7P9Ldj+/uixbfRw4luXTxNbgX/GaS5yZJVT05yelZ4v+6dsoGfPHijB9L8nvZ+Iby691953qnWolnJ/mBbBydfnjx54XrHoodvTLJjVX1Z0m+I8l/XfM8K7E4q3BTktuTfCQb3xNGvo1lVb09yQeTPKWqDlXVDyd5U5LnV9UnsvGq5jetc8bd2mZtv5jkMUluWXwf+eW1DrkL26xrT9hmbW9LcvHiV8vekeTKZc6ceCtVABjolD0CBwC2J+AAMJCAA8BAAg4AAwk4AAwk4AAwkIADwED/H3ZBvi8oWJldAAAAAElFTkSuQmCC\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "a = {'v': (0, 1), 'w': 0}\n",
+ "z = (2, 3, 5, 7)\n",
+ "S = monte_carlo_localization(a, z, 1000, P_motion_sample, P_sensor, m, S)\n",
+ "grid = [[0]*17 for _ in range(11)]\n",
+ "for x, y, _ in S:\n",
+ " if 0 <= x < 11 and 0 <= y < 17:\n",
+ " grid[x][y] += 1\n",
+ "print(\"GRID:\")\n",
+ "print_table(grid)\n",
+ "heatmap(grid, cmap='Oranges')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "In this case, the robot is 99.9% certain that it is at position `(6, 7)`."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## DECISION THEORETIC AGENT\n",
+ "We now move into the domain of probabilistic decision making.\n",
+ " \n",
+ "To make choices between different possible plans in a certain situation in a given environment, an agent must have _preference_ between the possible outcomes of the various plans.\n",
+ " \n",
+ "__Utility theory__ is used to represent and reason with preferences.\n",
+ "The agent prefers states with a higher _utility_.\n",
+ "While constructing multi-agent systems, one major element in the design is the mechanism the agents use for making decisions about which actions to adopt in order to achieve their goals.\n",
+ "What is usually required is a mechanism which ensures that the actions adopted lead to benefits for both individual agents, and the community of which they are part.\n",
+ "The utility of a state is _relative_ to an agent.\n",
+ " \n",
+ "Preferences, as expressed by utilities, are combined with probabilities in the general theory of rational decisions called __decision theory__.\n",
+ " \n",
+ "An agent is said to be _rational_ if and only if it chooses the action that yields the highest expected utility, averaged over all the possible outcomes of the action."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Here we'll see how a decision-theoretic agent is implemented in the module."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 97,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ " Codestin Search App \n",
+ " \n",
+ " \n",
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "def DTAgentProgram ( belief_state ): \n",
+ " """A decision-theoretic agent. [Figure 13.1]""" \n",
+ " def program ( percept ): \n",
+ " belief_state . observe ( program . action , percept ) \n",
+ " program . action = argmax ( belief_state . actions (), \n",
+ " key = belief_state . expected_outcome_utility ) \n",
+ " return program . action \n",
+ " program . action = None \n",
+ " return program \n",
+ " \n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "psource(DTAgentProgram)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The `DTAgentProgram` function is pretty self-explanatory.\n",
+ " \n",
+ "It encapsulates a function `program` that takes in an observation or a `percept`, updates its `belief_state` and returns the action that maximizes the `expected_outcome_utility`."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## INFORMATION GATHERING AGENT\n",
+ "Before we discuss what an information gathering agent is, we'll need to know what decision networks are.\n",
+ "For an agent in an environment, a decision network represents information about the agent's current state, its possible actions, the state that will result from the agent's action, and the utility of that state.\n",
+ "Decision networks have three primary kinds of nodes which are:\n",
+ "1. __Chance nodes__: These represent random variables, just like in Bayesian networks.\n",
+ "2. __Decision nodes__: These represent points where the decision-makes has a choice between different actions and the decision maker tries to find the optimal decision at these nodes with regard to the cost, safety and resulting utility.\n",
+ "3. __Utility nodes__: These represent the agent's utility function.\n",
+ "A description of the agent's utility as a function is associated with a utility node.\n",
+ " \n",
+ " \n",
+ "To evaluate a decision network, we do the following:\n",
+ "1. Initialize the evidence variables according to the current state.\n",
+ "2. Calculate posterior probabilities for each possible value of the decision node and calculate the utility resulting from that action.\n",
+ "3. Return the action with the highest utility.\n",
+ " \n",
+ "Let's have a look at the implementation of the `DecisionNetwork` class."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 98,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ " Codestin Search App \n",
+ " \n",
+ " \n",
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "class DecisionNetwork ( BayesNet ): \n",
+ " """An abstract class for a decision network as a wrapper for a BayesNet. \n",
+ " Represents an agent's current state, its possible actions, reachable states \n",
+ " and utilities of those states.""" \n",
+ "\n",
+ " def __init__ ( self , action , infer ): \n",
+ " """action: a single action node \n",
+ " infer: the preferred method to carry out inference on the given BayesNet""" \n",
+ " super ( DecisionNetwork , self ) . __init__ () \n",
+ " self . action = action \n",
+ " self . infer = infer \n",
+ "\n",
+ " def best_action ( self ): \n",
+ " """Return the best action in the network""" \n",
+ " return self . action \n",
+ "\n",
+ " def get_utility ( self , action , state ): \n",
+ " """Return the utility for a particular action and state in the network""" \n",
+ " raise NotImplementedError \n",
+ "\n",
+ " def get_expected_utility ( self , action , evidence ): \n",
+ " """Compute the expected utility given an action and evidence""" \n",
+ " u = 0.0 \n",
+ " prob_dist = self . infer ( action , evidence , self ) . prob \n",
+ " for item , _ in prob_dist . items (): \n",
+ " u += prob_dist [ item ] * self . get_utility ( action , item ) \n",
+ "\n",
+ " return u \n",
+ " \n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "psource(DecisionNetwork)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The `DecisionNetwork` class inherits from `BayesNet` and has a few extra helper methods.\n",
+ " \n",
+ "`best_action` returns the best action in the network.\n",
+ " \n",
+ "`get_utility` is an abstract method which is supposed to return the utility of a particular action and state in the network.\n",
+ " \n",
+ "`get_expected_utility` computes the expected utility, given an action and evidence.\n",
+ " "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Before we proceed, we need to know a few more terms.\n",
+ " \n",
+ "Having __perfect information__ refers to a state of being fully aware of the current state, the cost functions and the outcomes of actions.\n",
+ "This in turn allows an agent to find the exact utility value of each state.\n",
+ "If an agent has perfect information about the environment, maximum expected utility calculations are exact and can be computed with absolute certainty.\n",
+ " \n",
+ "In decision theory, the __value of perfect information__ (VPI) is the price that an agent would be willing to pay in order to gain access to _perfect information_.\n",
+ "VPI calculations are extensively used to calculate expected utilities for nodes in a decision network.\n",
+ " \n",
+ "For a random variable $E_j$ whose value is currently unknown, the value of discovering $E_j$, given current information $e$ must average over all possible values $e_{jk}$ that we might discover for $E_j$, using our _current_ beliefs about its value.\n",
+ "The VPI of $E_j$ is then given by:\n",
+ " \n",
+ " \n",
+ "$$VPI_e(E_j) = \\left(\\sum_{k}P(E_j=e_{jk}\\ |\\ e) EU(\\alpha_{e_{jk}}\\ |\\ e, E_j=e_{jk})\\right) - EU(\\alpha\\ |\\ e)$$\n",
+ " \n",
+ "VPI is _non-negative_, _non-additive_ and _order-indepentent_."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "An information gathering agent is an agent with certain properties that explores decision networks as and when required with heuristics driven by VPI calculations of nodes.\n",
+ "A sensible agent should ask questions in a reasonable order, should avoid asking irrelevant questions, should take into account the importance of each piece of information in relation to its cost and should stop asking questions when that is appropriate.\n",
+ "_VPI_ is used as the primary heuristic to consider all these points in an information gathering agent as the agent ultimately wants to maximize the utility and needs to find the optimal cost and extent of finding the required information.\n",
+ " \n",
+ "As an overview, an information gathering agent works by repeatedly selecting the observations with the highest information value, until the cost of the next observation is greater than its expected benefit.\n",
+ " \n",
+ "The `InformationGatheringAgent` class is an abstract class that inherits from `Agent` and works on the principles discussed above.\n",
+ "Let's have a look.\n",
+ " "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 99,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ " Codestin Search App \n",
+ " \n",
+ " \n",
+ "\n",
+ "\n",
+ " \n",
+ "\n",
+ "class InformationGatheringAgent ( Agent ): \n",
+ " """A simple information gathering agent. The agent works by repeatedly selecting \n",
+ " the observation with the highest information value, until the cost of the next \n",
+ " observation is greater than its expected benefit. [Figure 16.9]""" \n",
+ "\n",
+ " def __init__ ( self , decnet , infer , initial_evidence = None ): \n",
+ " """decnet: a decision network \n",
+ " infer: the preferred method to carry out inference on the given decision network \n",
+ " initial_evidence: initial evidence""" \n",
+ " self . decnet = decnet \n",
+ " self . infer = infer \n",
+ " self . observation = initial_evidence or [] \n",
+ " self . variables = self . decnet . nodes \n",
+ "\n",
+ " def integrate_percept ( self , percept ): \n",
+ " """Integrate the given percept into the decision network""" \n",
+ " raise NotImplementedError \n",
+ "\n",
+ " def execute ( self , percept ): \n",
+ " """Execute the information gathering algorithm""" \n",
+ " self . observation = self . integrate_percept ( percept ) \n",
+ " vpis = self . vpi_cost_ratio ( self . variables ) \n",
+ " j = argmax ( vpis ) \n",
+ " variable = self . variables [ j ] \n",
+ "\n",
+ " if self . vpi ( variable ) > self . cost ( variable ): \n",
+ " return self . request ( variable ) \n",
+ "\n",
+ " return self . decnet . best_action () \n",
+ "\n",
+ " def request ( self , variable ): \n",
+ " """Return the value of the given random variable as the next percept""" \n",
+ " raise NotImplementedError \n",
+ "\n",
+ " def cost ( self , var ): \n",
+ " """Return the cost of obtaining evidence through tests, consultants or questions""" \n",
+ " raise NotImplementedError \n",
+ "\n",
+ " def vpi_cost_ratio ( self , variables ): \n",
+ " """Return the VPI to cost ratio for the given variables""" \n",
+ " v_by_c = [] \n",
+ " for var in variables : \n",
+ " v_by_c . append ( self . vpi ( var ) / self . cost ( var )) \n",
+ " return v_by_c \n",
+ "\n",
+ " def vpi ( self , variable ): \n",
+ " """Return VPI for a given variable""" \n",
+ " vpi = 0.0 \n",
+ " prob_dist = self . infer ( variable , self . observation , self . decnet ) . prob \n",
+ " for item , _ in prob_dist . items (): \n",
+ " post_prob = prob_dist [ item ] \n",
+ " new_observation = list ( self . observation ) \n",
+ " new_observation . append ( item ) \n",
+ " expected_utility = self . decnet . get_expected_utility ( variable , new_observation ) \n",
+ " vpi += post_prob * expected_utility \n",
+ "\n",
+ " vpi -= self . decnet . get_expected_utility ( variable , self . observation ) \n",
+ " return vpi \n",
+ " \n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "psource(InformationGatheringAgent)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The `cost` method is an abstract method that returns the cost of obtaining the evidence through tests, consultants, questions or any other means.\n",
+ " \n",
+ "The `request` method returns the value of the given random variable as the next percept.\n",
+ " \n",
+ "The `vpi_cost_ratio` method returns a list of VPI divided by cost for each variable in the `variables` list provided to it.\n",
+ " \n",
+ "The `vpi` method calculates the VPI for a given variable\n",
+ " \n",
+ "And finally, the `execute` method executes the general information gathering algorithm, as described in __figure 16.9__ in the book.\n",
+ " \n",
+ "Our agent implements a form of information gathering that is called __myopic__ as the VPI formula is used shortsightedly here.\n",
+ "It calculates the value of information as if only a single evidence variable will be acquired.\n",
+ "This is similar to greedy search, where we do not look at the bigger picture and aim for local optimizations to hopefully reach the global optimum.\n",
+ "This often works well in practice but a myopic agent might hastily take an action when it would have been better to request more variables before taking an action.\n",
+ "A _conditional plan_, on the other hand might work better for some scenarios.\n",
+ " \n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "With this we conclude this notebook."
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.6.9"
}
},
"nbformat": 4,
- "nbformat_minor": 0
+ "nbformat_minor": 2
}
diff --git a/probability.py b/probability.py
index e102e4dd8..e1e77d224 100644
--- a/probability.py
+++ b/probability.py
@@ -1,30 +1,27 @@
-"""Probability models. (Chapter 13-15)
-"""
+"""Probability models (Chapter 13-15)"""
-from utils import (
- product, argmax, element_wise_product, matrix_multiplication,
- vector_to_diagonal, vector_add, scalar_vector_product, inverse_matrix,
- weighted_sample_with_replacement, isclose, probability, normalize
-)
-from logic import extend
-
-import random
from collections import defaultdict
from functools import reduce
-# ______________________________________________________________________________
+from agents import Agent
+from utils import *
def DTAgentProgram(belief_state):
- """A decision-theoretic agent. [Figure 13.1]"""
+ """
+ [Figure 13.1]
+ A decision-theoretic agent.
+ """
+
def program(percept):
belief_state.observe(program.action, percept)
- program.action = argmax(belief_state.actions(),
- key=belief_state.expected_outcome_utility)
+ program.action = max(belief_state.actions(), key=belief_state.expected_outcome_utility)
return program.action
+
program.action = None
return program
+
# ______________________________________________________________________________
@@ -38,14 +35,14 @@ class ProbDist:
(0.125, 0.375, 0.5)
"""
- def __init__(self, varname='?', freqs=None):
- """If freqs is given, it is a dictionary of values - frequency pairs,
+ def __init__(self, var_name='?', freq=None):
+ """If freq is given, it is a dictionary of values - frequency pairs,
then ProbDist is normalized."""
self.prob = {}
- self.varname = varname
+ self.var_name = var_name
self.values = []
- if freqs:
- for (v, p) in freqs.items():
+ if freq:
+ for (v, p) in freq.items():
self[v] = p
self.normalize()
@@ -67,19 +64,18 @@ def normalize(self):
Returns the normalized distribution.
Raises a ZeroDivisionError if the sum of the values is 0."""
total = sum(self.prob.values())
- if not isclose(total, 1.0):
+ if not np.isclose(total, 1.0):
for val in self.prob:
self.prob[val] /= total
return self
- def show_approx(self, numfmt='%.3g'):
+ def show_approx(self, numfmt='{:.3g}'):
"""Show the probabilities rounded and sorted by key, for the
sake of portable doctests."""
- return ', '.join([('%s: ' + numfmt) % (v, p)
- for (v, p) in sorted(self.prob.items())])
+ return ', '.join([('{}: ' + numfmt).format(v, p) for (v, p) in sorted(self.prob.items())])
def __repr__(self):
- return "P({})".format(self.varname)
+ return "P({})".format(self.var_name)
class JointProbDist(ProbDist):
@@ -102,7 +98,7 @@ def __getitem__(self, values):
return ProbDist.__getitem__(self, values)
def __setitem__(self, values, p):
- """Set P(values) = p. Values can be a tuple or a dict; it must
+ """Set P(values) = p. Values can be a tuple or a dict; it must
have a value for each of the variables in the joint. Also keep track
of the values we have seen so far for each variable."""
values = event_values(values, self.variables)
@@ -131,12 +127,15 @@ def event_values(event, variables):
else:
return tuple([event[var] for var in variables])
+
# ______________________________________________________________________________
def enumerate_joint_ask(X, e, P):
- """Return a probability distribution over the values of the variable X,
- given the {var:val} observations e, in the JointProbDist P. [Section 13.3]
+ """
+ [Section 13.3]
+ Return a probability distribution over the values of the variable X,
+ given the {var:val} observations e, in the JointProbDist P.
>>> P = JointProbDist(['X', 'Y'])
>>> P[0,0] = 0.25; P[0,1] = 0.5; P[1,1] = P[2,1] = 0.125
>>> enumerate_joint_ask('X', dict(Y=1), P).show_approx()
@@ -156,8 +155,8 @@ def enumerate_joint(variables, e, P):
if not variables:
return P[e]
Y, rest = variables[0], variables[1:]
- return sum([enumerate_joint(rest, extend(e, Y, y), P)
- for y in P.values(Y)])
+ return sum([enumerate_joint(rest, extend(e, Y, y), P) for y in P.values(Y)])
+
# ______________________________________________________________________________
@@ -165,10 +164,11 @@ def enumerate_joint(variables, e, P):
class BayesNet:
"""Bayesian network containing only boolean-variable nodes."""
- def __init__(self, node_specs=[]):
+ def __init__(self, node_specs=None):
"""Nodes must be ordered with parents before children."""
self.nodes = []
self.variables = []
+ node_specs = node_specs or []
for node_spec in node_specs:
self.add(node_spec)
@@ -200,13 +200,105 @@ def __repr__(self):
return 'BayesNet({0!r})'.format(self.nodes)
+class DecisionNetwork(BayesNet):
+ """An abstract class for a decision network as a wrapper for a BayesNet.
+ Represents an agent's current state, its possible actions, reachable states
+ and utilities of those states."""
+
+ def __init__(self, action, infer):
+ """action: a single action node
+ infer: the preferred method to carry out inference on the given BayesNet"""
+ super(DecisionNetwork, self).__init__()
+ self.action = action
+ self.infer = infer
+
+ def best_action(self):
+ """Return the best action in the network"""
+ return self.action
+
+ def get_utility(self, action, state):
+ """Return the utility for a particular action and state in the network"""
+ raise NotImplementedError
+
+ def get_expected_utility(self, action, evidence):
+ """Compute the expected utility given an action and evidence"""
+ u = 0.0
+ prob_dist = self.infer(action, evidence, self).prob
+ for item, _ in prob_dist.items():
+ u += prob_dist[item] * self.get_utility(action, item)
+
+ return u
+
+
+class InformationGatheringAgent(Agent):
+ """
+ [Figure 16.9]
+ A simple information gathering agent. The agent works by repeatedly selecting
+ the observation with the highest information value, until the cost of the next
+ observation is greater than its expected benefit."""
+
+ def __init__(self, decnet, infer, initial_evidence=None):
+ """decnet: a decision network
+ infer: the preferred method to carry out inference on the given decision network
+ initial_evidence: initial evidence"""
+ self.decnet = decnet
+ self.infer = infer
+ self.observation = initial_evidence or []
+ self.variables = self.decnet.nodes
+
+ def integrate_percept(self, percept):
+ """Integrate the given percept into the decision network"""
+ raise NotImplementedError
+
+ def execute(self, percept):
+ """Execute the information gathering algorithm"""
+ self.observation = self.integrate_percept(percept)
+ vpis = self.vpi_cost_ratio(self.variables)
+ j = max(vpis)
+ variable = self.variables[j]
+
+ if self.vpi(variable) > self.cost(variable):
+ return self.request(variable)
+
+ return self.decnet.best_action()
+
+ def request(self, variable):
+ """Return the value of the given random variable as the next percept"""
+ raise NotImplementedError
+
+ def cost(self, var):
+ """Return the cost of obtaining evidence through tests, consultants or questions"""
+ raise NotImplementedError
+
+ def vpi_cost_ratio(self, variables):
+ """Return the VPI to cost ratio for the given variables"""
+ v_by_c = []
+ for var in variables:
+ v_by_c.append(self.vpi(var) / self.cost(var))
+ return v_by_c
+
+ def vpi(self, variable):
+ """Return VPI for a given variable"""
+ vpi = 0.0
+ prob_dist = self.infer(variable, self.observation, self.decnet).prob
+ for item, _ in prob_dist.items():
+ post_prob = prob_dist[item]
+ new_observation = list(self.observation)
+ new_observation.append(item)
+ expected_utility = self.decnet.get_expected_utility(variable, new_observation)
+ vpi += post_prob * expected_utility
+
+ vpi -= self.decnet.get_expected_utility(variable, self.observation)
+ return vpi
+
+
class BayesNode:
"""A conditional probability distribution for a boolean variable,
P(X | parents). Part of a BayesNet."""
def __init__(self, X, parents, cpt):
"""X is a variable name, and parents a sequence of variable
- names or a space-separated string. cpt, the conditional
+ names or a space-separated string. cpt, the conditional
probability table, takes one of these forms:
* A number, the unconditional probability P(X=true). You can
@@ -277,21 +369,22 @@ def __repr__(self):
T, F = True, False
-burglary = BayesNet([
- ('Burglary', '', 0.001),
- ('Earthquake', '', 0.002),
- ('Alarm', 'Burglary Earthquake',
- {(T, T): 0.95, (T, F): 0.94, (F, T): 0.29, (F, F): 0.001}),
- ('JohnCalls', 'Alarm', {T: 0.90, F: 0.05}),
- ('MaryCalls', 'Alarm', {T: 0.70, F: 0.01})
-])
+burglary = BayesNet([('Burglary', '', 0.001),
+ ('Earthquake', '', 0.002),
+ ('Alarm', 'Burglary Earthquake',
+ {(T, T): 0.95, (T, F): 0.94, (F, T): 0.29, (F, F): 0.001}),
+ ('JohnCalls', 'Alarm', {T: 0.90, F: 0.05}),
+ ('MaryCalls', 'Alarm', {T: 0.70, F: 0.01})])
+
# ______________________________________________________________________________
def enumeration_ask(X, e, bn):
- """Return the conditional probability distribution of variable X
- given evidence e, from BayesNet bn. [Figure 14.9]
+ """
+ [Figure 14.9]
+ Return the conditional probability distribution of variable X
+ given evidence e, from BayesNet bn.
>>> enumeration_ask('Burglary', dict(JohnCalls=T, MaryCalls=T), burglary
... ).show_approx()
'False: 0.716, True: 0.284'"""
@@ -317,11 +410,14 @@ def enumerate_all(variables, e, bn):
return sum(Ynode.p(y, e) * enumerate_all(rest, extend(e, Y, y), bn)
for y in bn.variable_values(Y))
+
# ______________________________________________________________________________
def elimination_ask(X, e, bn):
- """Compute bn's P(X|e) by variable elimination. [Figure 14.11]
+ """
+ [Figure 14.11]
+ Compute bn's P(X|e) by variable elimination.
>>> elimination_ask('Burglary', dict(JohnCalls=T, MaryCalls=T), burglary
... ).show_approx()
'False: 0.716, True: 0.284'"""
@@ -373,23 +469,20 @@ def __init__(self, variables, cpt):
def pointwise_product(self, other, bn):
"""Multiply two factors, combining their variables."""
variables = list(set(self.variables) | set(other.variables))
- cpt = {event_values(e, variables): self.p(e) * other.p(e)
- for e in all_events(variables, bn, {})}
+ cpt = {event_values(e, variables): self.p(e) * other.p(e) for e in all_events(variables, bn, {})}
return Factor(variables, cpt)
def sum_out(self, var, bn):
"""Make a factor eliminating var by summing over its values."""
variables = [X for X in self.variables if X != var]
- cpt = {event_values(e, variables): sum(self.p(extend(e, var, val))
- for val in bn.variable_values(var))
+ cpt = {event_values(e, variables): sum(self.p(extend(e, var, val)) for val in bn.variable_values(var))
for e in all_events(variables, bn, {})}
return Factor(variables, cpt)
def normalize(self):
"""Return my probabilities; must be down to one variable."""
assert len(self.variables) == 1
- return ProbDist(self.variables[0],
- {k: v for ((k,), v) in self.cpt.items()})
+ return ProbDist(self.variables[0], {k: v for ((k,), v) in self.cpt.items()})
def p(self, e):
"""Look up my value tabulated for e."""
@@ -406,35 +499,42 @@ def all_events(variables, bn, e):
for x in bn.variable_values(X):
yield extend(e1, X, x)
+
# ______________________________________________________________________________
# [Figure 14.12a]: sprinkler network
-sprinkler = BayesNet([
- ('Cloudy', '', 0.5),
- ('Sprinkler', 'Cloudy', {T: 0.10, F: 0.50}),
- ('Rain', 'Cloudy', {T: 0.80, F: 0.20}),
- ('WetGrass', 'Sprinkler Rain',
- {(T, T): 0.99, (T, F): 0.90, (F, T): 0.90, (F, F): 0.00})])
+sprinkler = BayesNet([('Cloudy', '', 0.5),
+ ('Sprinkler', 'Cloudy', {T: 0.10, F: 0.50}),
+ ('Rain', 'Cloudy', {T: 0.80, F: 0.20}),
+ ('WetGrass', 'Sprinkler Rain',
+ {(T, T): 0.99, (T, F): 0.90, (F, T): 0.90, (F, F): 0.00})])
+
# ______________________________________________________________________________
def prior_sample(bn):
- """Randomly sample from bn's full joint distribution. The result
- is a {variable: value} dict. [Figure 14.13]"""
+ """
+ [Figure 14.13]
+ Randomly sample from bn's full joint distribution.
+ The result is a {variable: value} dict.
+ """
event = {}
for node in bn.nodes:
event[node.variable] = node.sample(event)
return event
+
# _________________________________________________________________________
-def rejection_sampling(X, e, bn, N):
- """Estimate the probability distribution of variable X given
- evidence e in BayesNet bn, using N samples. [Figure 14.14]
+def rejection_sampling(X, e, bn, N=10000):
+ """
+ [Figure 14.14]
+ Estimate the probability distribution of variable X given
+ evidence e in BayesNet bn, using N samples.
Raises a ZeroDivisionError if all the N samples are rejected,
i.e., inconsistent with e.
>>> random.seed(47)
@@ -452,15 +552,17 @@ def rejection_sampling(X, e, bn, N):
def consistent_with(event, evidence):
"""Is event consistent with the given evidence?"""
- return all(evidence.get(k, v) == v
- for k, v in event.items())
+ return all(evidence.get(k, v) == v for k, v in event.items())
+
# _________________________________________________________________________
-def likelihood_weighting(X, e, bn, N):
- """Estimate the probability distribution of variable X given
- evidence e in BayesNet bn. [Figure 14.15]
+def likelihood_weighting(X, e, bn, N=10000):
+ """
+ [Figure 14.15]
+ Estimate the probability distribution of variable X given
+ evidence e in BayesNet bn.
>>> random.seed(1017)
>>> likelihood_weighting('Burglary', dict(JohnCalls=T, MaryCalls=T),
... burglary, 10000).show_approx()
@@ -474,9 +576,11 @@ def likelihood_weighting(X, e, bn, N):
def weighted_sample(bn, e):
- """Sample an event from bn that's consistent with the evidence e;
+ """
+ Sample an event from bn that's consistent with the evidence e;
return the event and its weight, the likelihood that the event
- accords to the evidence."""
+ accords to the evidence.
+ """
w = 1
event = dict(e) # boldface x in [Figure 14.15]
for node in bn.nodes:
@@ -487,10 +591,11 @@ def weighted_sample(bn, e):
event[Xi] = node.sample(event)
return event, w
+
# _________________________________________________________________________
-def gibbs_ask(X, e, bn, N):
+def gibbs_ask(X, e, bn, N=1000):
"""[Figure 14.16]"""
assert X not in e, "Query variable must be distinct from evidence"
counts = {x: 0 for x in bn.variable_values(X)} # bold N in [Figure 14.16]
@@ -514,22 +619,22 @@ def markov_blanket_sample(X, e, bn):
Q = ProbDist(X)
for xi in bn.variable_values(X):
ei = extend(e, X, xi)
- # [Equation 14.12:]
- Q[xi] = Xnode.p(xi, e) * product(Yj.p(ei[Yj.variable], ei)
- for Yj in Xnode.children)
+ # [Equation 14.12]
+ Q[xi] = Xnode.p(xi, e) * product(Yj.p(ei[Yj.variable], ei) for Yj in Xnode.children)
# (assuming a Boolean variable here)
return probability(Q.normalize()[True])
+
# _________________________________________________________________________
class HiddenMarkovModel:
"""A Hidden markov model which takes Transition model and Sensor model as inputs"""
- def __init__(self, transition_model, sensor_model, prior=[0.5, 0.5]):
+ def __init__(self, transition_model, sensor_model, prior=None):
self.transition_model = transition_model
self.sensor_model = sensor_model
- self.prior = prior
+ self.prior = prior or [0.5, 0.5]
def sensor_dist(self, ev):
if ev is True:
@@ -554,52 +659,95 @@ def backward(HMM, b, ev):
scalar_vector_product(prediction[1], HMM.transition_model[1])))
-def forward_backward(HMM, ev, prior):
- """[Figure 15.4]
+def forward_backward(HMM, ev):
+ """
+ [Figure 15.4]
Forward-Backward algorithm for smoothing. Computes posterior probabilities
- of a sequence of states given a sequence of observations."""
+ of a sequence of states given a sequence of observations.
+ """
t = len(ev)
ev.insert(0, None) # to make the code look similar to pseudo code
- fv = [[0.0, 0.0] for i in range(len(ev))]
+ fv = [[0.0, 0.0] for _ in range(len(ev))]
b = [1.0, 1.0]
- bv = [b] # we don't need bv; but we will have a list of all backward messages here
- sv = [[0, 0] for i in range(len(ev))]
+ sv = [[0, 0] for _ in range(len(ev))]
- fv[0] = prior
+ fv[0] = HMM.prior
for i in range(1, t + 1):
fv[i] = forward(HMM, fv[i - 1], ev[i])
for i in range(t, -1, -1):
sv[i - 1] = normalize(element_wise_product(fv[i], b))
b = backward(HMM, b, ev[i])
- bv.append(b)
sv = sv[::-1]
return sv
+
+def viterbi(HMM, ev):
+ """
+ [Equation 15.11]
+ Viterbi algorithm to find the most likely sequence. Computes the best path and the
+ corresponding probabilities, given an HMM model and a sequence of observations.
+ """
+ t = len(ev)
+ ev = ev.copy()
+ ev.insert(0, None)
+
+ m = [[0.0, 0.0] for _ in range(len(ev) - 1)]
+
+ # the recursion is initialized with m1 = forward(P(X0), e1)
+ m[0] = forward(HMM, HMM.prior, ev[1])
+ # keep track of maximizing predecessors
+ backtracking_graph = []
+
+ for i in range(1, t):
+ m[i] = element_wise_product(HMM.sensor_dist(ev[i + 1]),
+ [max(element_wise_product(HMM.transition_model[0], m[i - 1])),
+ max(element_wise_product(HMM.transition_model[1], m[i - 1]))])
+ backtracking_graph.append([np.argmax(element_wise_product(HMM.transition_model[0], m[i - 1])),
+ np.argmax(element_wise_product(HMM.transition_model[1], m[i - 1]))])
+
+ # computed probabilities
+ ml_probabilities = [0.0] * (len(ev) - 1)
+ # most likely sequence
+ ml_path = [True] * (len(ev) - 1)
+
+ # the construction of the most likely sequence starts in the final state with the largest probability, and
+ # runs backwards; the algorithm needs to store for each xt its predecessor xt-1 maximizing its probability
+ i_max = np.argmax(m[-1])
+
+ for i in range(t - 1, -1, -1):
+ ml_probabilities[i] = m[i][i_max]
+ ml_path[i] = True if i_max == 0 else False
+ if i > 0:
+ i_max = backtracking_graph[i - 1][i_max]
+
+ return ml_path, ml_probabilities
+
+
# _________________________________________________________________________
def fixed_lag_smoothing(e_t, HMM, d, ev, t):
- """[Figure 15.6]
+ """
+ [Figure 15.6]
Smoothing algorithm with a fixed time lag of 'd' steps.
Online algorithm that outputs the new smoothed estimate if observation
- for new time step is given."""
+ for new time step is given.
+ """
ev.insert(0, None)
T_model = HMM.transition_model
f = HMM.prior
B = [[1, 0], [0, 1]]
- evidence = []
- evidence.append(e_t)
- O_t = vector_to_diagonal(HMM.sensor_dist(e_t))
+ O_t = np.diag(HMM.sensor_dist(e_t))
if t > d:
f = forward(HMM, f, e_t)
- O_tmd = vector_to_diagonal(HMM.sensor_dist(ev[t - d]))
- B = matrix_multiplication(inverse_matrix(O_tmd), inverse_matrix(T_model), B, T_model, O_t)
+ O_tmd = np.diag(HMM.sensor_dist(ev[t - d]))
+ B = matrix_multiplication(np.linalg.inv(O_tmd), np.linalg.inv(T_model), B, T_model, O_t)
else:
B = matrix_multiplication(B, T_model, O_t)
t += 1
@@ -610,6 +758,7 @@ def fixed_lag_smoothing(e_t, HMM, d, ev, t):
else:
return None
+
# _________________________________________________________________________
@@ -645,7 +794,77 @@ def particle_filtering(e, N, HMM):
w[i] = float("{0:.4f}".format(w[i]))
# STEP 2
-
s = weighted_sample_with_replacement(N, s, w)
return s
+
+
+# _________________________________________________________________________
+# TODO: Implement continuous map for MonteCarlo similar to Fig25.10 from the book
+
+
+class MCLmap:
+ """Map which provides probability distributions and sensor readings.
+ Consists of discrete cells which are either an obstacle or empty"""
+
+ def __init__(self, m):
+ self.m = m
+ self.nrows = len(m)
+ self.ncols = len(m[0])
+ # list of empty spaces in the map
+ self.empty = [(i, j) for i in range(self.nrows) for j in range(self.ncols) if not m[i][j]]
+
+ def sample(self):
+ """Returns a random kinematic state possible in the map"""
+ pos = random.choice(self.empty)
+ # 0N 1E 2S 3W
+ orient = random.choice(range(4))
+ kin_state = pos + (orient,)
+ return kin_state
+
+ def ray_cast(self, sensor_num, kin_state):
+ """Returns distance to nearest obstacle or map boundary in the direction of sensor"""
+ pos = kin_state[:2]
+ orient = kin_state[2]
+ # sensor layout when orientation is 0 (towards North)
+ # 0
+ # 3R1
+ # 2
+ delta = ((sensor_num % 2 == 0) * (sensor_num - 1), (sensor_num % 2 == 1) * (2 - sensor_num))
+ # sensor direction changes based on orientation
+ for _ in range(orient):
+ delta = (delta[1], -delta[0])
+ range_count = 0
+ while 0 <= pos[0] < self.nrows and 0 <= pos[1] < self.nrows and not self.m[pos[0]][pos[1]]:
+ pos = vector_add(pos, delta)
+ range_count += 1
+ return range_count
+
+
+def monte_carlo_localization(a, z, N, P_motion_sample, P_sensor, m, S=None):
+ """
+ [Figure 25.9]
+ Monte Carlo localization algorithm
+ """
+
+ def ray_cast(sensor_num, kin_state, m):
+ return m.ray_cast(sensor_num, kin_state)
+
+ M = len(z)
+ S_ = [0] * N
+ W_ = [0] * N
+ v = a['v']
+ w = a['w']
+
+ if S is None:
+ S = [m.sample() for _ in range(N)]
+
+ for i in range(N):
+ S_[i] = P_motion_sample(S[i], v, w)
+ W_[i] = 1
+ for j in range(M):
+ z_ = ray_cast(j, S_[i], m)
+ W_[i] = W_[i] * P_sensor(z[j], z_)
+
+ S = weighted_sample_with_replacement(N, S_, W_)
+ return S
diff --git a/probability-4e.ipynb b/probability4e.ipynb
similarity index 100%
rename from probability-4e.ipynb
rename to probability4e.ipynb
diff --git a/probability4e.py b/probability4e.py
new file mode 100644
index 000000000..d413a55ae
--- /dev/null
+++ b/probability4e.py
@@ -0,0 +1,776 @@
+"""Probability models (Chapter 12-13)"""
+
+import copy
+import random
+from collections import defaultdict
+from functools import reduce
+
+import numpy as np
+
+from utils4e import product, probability, extend
+
+
+# ______________________________________________________________________________
+# Chapter 12 Qualifying Uncertainty
+# 12.1 Acting Under Uncertainty
+
+
+def DTAgentProgram(belief_state):
+ """A decision-theoretic agent. [Figure 12.1]"""
+
+ def program(percept):
+ belief_state.observe(program.action, percept)
+ program.action = max(belief_state.actions(), key=belief_state.expected_outcome_utility)
+ return program.action
+
+ program.action = None
+ return program
+
+
+# ______________________________________________________________________________
+# 12.2 Basic Probability Notation
+
+
+class ProbDist:
+ """A discrete probability distribution. You name the random variable
+ in the constructor, then assign and query probability of values.
+ >>> P = ProbDist('Flip'); P['H'], P['T'] = 0.25, 0.75; P['H']
+ 0.25
+ >>> P = ProbDist('X', {'lo': 125, 'med': 375, 'hi': 500})
+ >>> P['lo'], P['med'], P['hi']
+ (0.125, 0.375, 0.5)
+ """
+
+ def __init__(self, varname='?', freqs=None):
+ """If freqs is given, it is a dictionary of values - frequency pairs,
+ then ProbDist is normalized."""
+ self.prob = {}
+ self.varname = varname
+ self.values = []
+ if freqs:
+ for (v, p) in freqs.items():
+ self[v] = p
+ self.normalize()
+
+ def __getitem__(self, val):
+ """Given a value, return P(value)."""
+ try:
+ return self.prob[val]
+ except KeyError:
+ return 0
+
+ def __setitem__(self, val, p):
+ """Set P(val) = p."""
+ if val not in self.values:
+ self.values.append(val)
+ self.prob[val] = p
+
+ def normalize(self):
+ """Make sure the probabilities of all values sum to 1.
+ Returns the normalized distribution.
+ Raises a ZeroDivisionError if the sum of the values is 0."""
+ total = sum(self.prob.values())
+ if not np.isclose(total, 1.0):
+ for val in self.prob:
+ self.prob[val] /= total
+ return self
+
+ def show_approx(self, numfmt='{:.3g}'):
+ """Show the probabilities rounded and sorted by key, for the
+ sake of portable doctests."""
+ return ', '.join([('{}: ' + numfmt).format(v, p)
+ for (v, p) in sorted(self.prob.items())])
+
+ def __repr__(self):
+ return "P({})".format(self.varname)
+
+
+# ______________________________________________________________________________
+# 12.3 Inference Using Full Joint Distributions
+
+
+class JointProbDist(ProbDist):
+ """A discrete probability distribute over a set of variables.
+ >>> P = JointProbDist(['X', 'Y']); P[1, 1] = 0.25
+ >>> P[1, 1]
+ 0.25
+ >>> P[dict(X=0, Y=1)] = 0.5
+ >>> P[dict(X=0, Y=1)]
+ 0.5"""
+
+ def __init__(self, variables):
+ self.prob = {}
+ self.variables = variables
+ self.vals = defaultdict(list)
+
+ def __getitem__(self, values):
+ """Given a tuple or dict of values, return P(values)."""
+ values = event_values(values, self.variables)
+ return ProbDist.__getitem__(self, values)
+
+ def __setitem__(self, values, p):
+ """Set P(values) = p. Values can be a tuple or a dict; it must
+ have a value for each of the variables in the joint. Also keep track
+ of the values we have seen so far for each variable."""
+ values = event_values(values, self.variables)
+ self.prob[values] = p
+ for var, val in zip(self.variables, values):
+ if val not in self.vals[var]:
+ self.vals[var].append(val)
+
+ def values(self, var):
+ """Return the set of possible values for a variable."""
+ return self.vals[var]
+
+ def __repr__(self):
+ return "P({})".format(self.variables)
+
+
+def event_values(event, variables):
+ """Return a tuple of the values of variables in event.
+ >>> event_values ({'A': 10, 'B': 9, 'C': 8}, ['C', 'A'])
+ (8, 10)
+ >>> event_values ((1, 2), ['C', 'A'])
+ (1, 2)
+ """
+ if isinstance(event, tuple) and len(event) == len(variables):
+ return event
+ else:
+ return tuple([event[var] for var in variables])
+
+
+def enumerate_joint_ask(X, e, P):
+ """Return a probability distribution over the values of the variable X,
+ given the {var:val} observations e, in the JointProbDist P. [Section 12.3]
+ >>> P = JointProbDist(['X', 'Y'])
+ >>> P[0,0] = 0.25; P[0,1] = 0.5; P[1,1] = P[2,1] = 0.125
+ >>> enumerate_joint_ask('X', dict(Y=1), P).show_approx()
+ '0: 0.667, 1: 0.167, 2: 0.167'
+ """
+ assert X not in e, "Query variable must be distinct from evidence"
+ Q = ProbDist(X) # probability distribution for X, initially empty
+ Y = [v for v in P.variables if v != X and v not in e] # hidden variables.
+ for xi in P.values(X):
+ Q[xi] = enumerate_joint(Y, extend(e, X, xi), P)
+ return Q.normalize()
+
+
+def enumerate_joint(variables, e, P):
+ """Return the sum of those entries in P consistent with e,
+ provided variables is P's remaining variables (the ones not in e)."""
+ if not variables:
+ return P[e]
+ Y, rest = variables[0], variables[1:]
+ return sum([enumerate_joint(rest, extend(e, Y, y), P)
+ for y in P.values(Y)])
+
+
+# ______________________________________________________________________________
+# 12.4 Independence
+
+
+def is_independent(variables, P):
+ """
+ Return whether a list of variables are independent given their distribution P
+ P is an instance of JoinProbDist
+ >>> P = JointProbDist(['X', 'Y'])
+ >>> P[0,0] = 0.25; P[0,1] = 0.5; P[1,1] = P[1,0] = 0.125
+ >>> is_independent(['X', 'Y'], P)
+ False
+ """
+ for var in variables:
+ event_vars = variables[:]
+ event_vars.remove(var)
+ event = {}
+ distribution = enumerate_joint_ask(var, event, P)
+ events = gen_possible_events(event_vars, P)
+ for e in events:
+ conditional_distr = enumerate_joint_ask(var, e, P)
+ if conditional_distr.prob != distribution.prob:
+ return False
+ return True
+
+
+def gen_possible_events(vars, P):
+ """Generate all possible events of a collection of vars according to distribution of P"""
+ events = []
+
+ def backtrack(vars, P, temp):
+ if not vars:
+ events.append(temp)
+ return
+ var = vars[0]
+ for val in P.values(var):
+ temp[var] = val
+ backtrack([v for v in vars if v != var], P, copy.copy(temp))
+
+ backtrack(vars, P, {})
+ return events
+
+
+# ______________________________________________________________________________
+# Chapter 13 Probabilistic Reasoning
+# 13.1 Representing Knowledge in an Uncertain Domain
+
+
+class BayesNet:
+ """Bayesian network containing only boolean-variable nodes."""
+
+ def __init__(self, node_specs=None):
+ """
+ Nodes must be ordered with parents before children.
+ :param node_specs: an nested iterable object, each element contains (variable name, parents name, cpt)
+ for each node
+ """
+
+ self.nodes = []
+ self.variables = []
+ node_specs = node_specs or []
+ for node_spec in node_specs:
+ self.add(node_spec)
+
+ def add(self, node_spec):
+ """
+ Add a node to the net. Its parents must already be in the
+ net, and its variable must not.
+ Initialize Bayes nodes by detecting the length of input node specs
+ """
+ if len(node_spec) >= 5:
+ node = ContinuousBayesNode(*node_spec)
+ else:
+ node = BayesNode(*node_spec)
+ assert node.variable not in self.variables
+ assert all((parent in self.variables) for parent in node.parents)
+ self.nodes.append(node)
+ self.variables.append(node.variable)
+ for parent in node.parents:
+ self.variable_node(parent).children.append(node)
+
+ def variable_node(self, var):
+ """
+ Return the node for the variable named var.
+ >>> burglary.variable_node('Burglary').variable
+ 'Burglary'
+ """
+ for n in self.nodes:
+ if n.variable == var:
+ return n
+ raise Exception("No such variable: {}".format(var))
+
+ def variable_values(self, var):
+ """Return the domain of var."""
+ return [True, False]
+
+ def __repr__(self):
+ return 'BayesNet({0!r})'.format(self.nodes)
+
+
+class BayesNode:
+ """
+ A conditional probability distribution for a boolean variable,
+ P(X | parents). Part of a BayesNet.
+ """
+
+ def __init__(self, X, parents, cpt):
+ """
+ :param X: variable name,
+ :param parents: a sequence of variable names or a space-separated string. Representing the names of parent nodes
+ :param cpt: the conditional probability table, takes one of these forms:
+
+ * A number, the unconditional probability P(X=true). You can
+ use this form when there are no parents.
+
+ * A dict {v: p, ...}, the conditional probability distribution
+ P(X=true | parent=v) = p. When there's just one parent.
+
+ * A dict {(v1, v2, ...): p, ...}, the distribution P(X=true |
+ parent1=v1, parent2=v2, ...) = p. Each key must have as many
+ values as there are parents. You can use this form always;
+ the first two are just conveniences.
+
+ In all cases the probability of X being false is left implicit,
+ since it follows from P(X=true).
+
+ >>> X = BayesNode('X', '', 0.2)
+ >>> Y = BayesNode('Y', 'P', {T: 0.2, F: 0.7})
+ >>> Z = BayesNode('Z', 'P Q',
+ ... {(T, T): 0.2, (T, F): 0.3, (F, T): 0.5, (F, F): 0.7})
+ """
+ if isinstance(parents, str):
+ parents = parents.split()
+
+ # We store the table always in the third form above.
+ if isinstance(cpt, (float, int)): # no parents, 0-tuple
+ cpt = {(): cpt}
+ elif isinstance(cpt, dict):
+ # one parent, 1-tuple
+ if cpt and isinstance(list(cpt.keys())[0], bool):
+ cpt = {(v,): p for v, p in cpt.items()}
+
+ assert isinstance(cpt, dict)
+ for vs, p in cpt.items():
+ assert isinstance(vs, tuple) and len(vs) == len(parents)
+ assert all(isinstance(v, bool) for v in vs)
+ assert 0 <= p <= 1
+
+ self.variable = X
+ self.parents = parents
+ self.cpt = cpt
+ self.children = []
+
+ def p(self, value, event):
+ """
+ Return the conditional probability
+ P(X=value | parents=parent_values), where parent_values
+ are the values of parents in event. (event must assign each
+ parent a value.)
+ >>> bn = BayesNode('X', 'Burglary', {T: 0.2, F: 0.625})
+ >>> bn.p(False, {'Burglary': False, 'Earthquake': True})
+ 0.375
+ """
+ assert isinstance(value, bool)
+ ptrue = self.cpt[event_values(event, self.parents)]
+ return ptrue if value else 1 - ptrue
+
+ def sample(self, event):
+ """
+ Sample from the distribution for this variable conditioned
+ on event's values for parent_variables. That is, return True/False
+ at random according with the conditional probability given the
+ parents.
+ """
+ return probability(self.p(True, event))
+
+ def __repr__(self):
+ return repr((self.variable, ' '.join(self.parents)))
+
+
+# Burglary example [Figure 13 .2]
+
+
+T, F = True, False
+
+burglary = BayesNet([
+ ('Burglary', '', 0.001),
+ ('Earthquake', '', 0.002),
+ ('Alarm', 'Burglary Earthquake',
+ {(T, T): 0.95, (T, F): 0.94, (F, T): 0.29, (F, F): 0.001}),
+ ('JohnCalls', 'Alarm', {T: 0.90, F: 0.05}),
+ ('MaryCalls', 'Alarm', {T: 0.70, F: 0.01})
+])
+
+
+# ______________________________________________________________________________
+# Section 13.2. The Semantics of Bayesian Networks
+# Bayesian nets with continuous variables
+
+
+def gaussian_probability(param, event, value):
+ """
+ Gaussian probability of a continuous Bayesian network node on condition of
+ certain event and the parameters determined by the event
+ :param param: parameters determined by discrete parent events of current node
+ :param event: a dict, continuous event of current node, the values are used
+ as parameters in calculating distribution
+ :param value: float, the value of current continuous node
+ :return: float, the calculated probability
+ >>> param = {'sigma':0.5, 'b':1, 'a':{'h1':0.5, 'h2': 1.5}}
+ >>> event = {'h1':0.6, 'h2': 0.3}
+ >>> gaussian_probability(param, event, 1)
+ 0.2590351913317835
+ """
+
+ assert isinstance(event, dict)
+ assert isinstance(param, dict)
+ buff = 0
+ for k, v in event.items():
+ # buffer varianle to calculate h1*a_h1 + h2*a_h2
+ buff += param['a'][k] * v
+ res = 1 / (param['sigma'] * np.sqrt(2 * np.pi)) * np.exp(-0.5 * ((value - buff - param['b']) / param['sigma']) ** 2)
+ return res
+
+
+def logistic_probability(param, event, value):
+ """
+ Logistic probability of a discrete node in Bayesian network with continuous parents,
+ :param param: a dict, parameters determined by discrete parents of current node
+ :param event: a dict, names and values of continuous parent variables of current node
+ :param value: boolean, True or False
+ :return: int, probability
+ """
+
+ buff = 1
+ for _, v in event.items():
+ # buffer variable to calculate (value-mu)/sigma
+
+ buff *= (v - param['mu']) / param['sigma']
+ p = 1 - 1 / (1 + np.exp(-4 / np.sqrt(2 * np.pi) * buff))
+ return p if value else 1 - p
+
+
+class ContinuousBayesNode:
+ """ A Bayesian network node with continuous distribution or with continuous distributed parents """
+
+ def __init__(self, name, d_parents, c_parents, parameters, type):
+ """
+ A continuous Bayesian node has two types of parents: discrete and continuous.
+ :param d_parents: str, name of discrete parents, value of which determines distribution parameters
+ :param c_parents: str, name of continuous parents, value of which is used to calculate distribution
+ :param parameters: a dict, parameters for distribution of current node, keys corresponds to discrete parents
+ :param type: str, type of current node's value, either 'd' (discrete) or 'c'(continuous)
+ """
+
+ self.parameters = parameters
+ self.type = type
+ self.d_parents = d_parents.split()
+ self.c_parents = c_parents.split()
+ self.parents = self.d_parents + self.c_parents
+ self.variable = name
+ self.children = []
+
+ def continuous_p(self, value, c_event, d_event):
+ """
+ Probability given the value of current node and its parents
+ :param c_event: event of continuous nodes
+ :param d_event: event of discrete nodes
+ """
+ assert isinstance(c_event, dict)
+ assert isinstance(d_event, dict)
+
+ d_event_vals = event_values(d_event, self.d_parents)
+ if len(d_event_vals) == 1:
+ d_event_vals = d_event_vals[0]
+ param = self.parameters[d_event_vals]
+ if self.type == "c":
+ p = gaussian_probability(param, c_event, value)
+ if self.type == "d":
+ p = logistic_probability(param, c_event, value)
+ return p
+
+
+# harvest-buy example. Figure 13.5
+
+
+harvest_buy = BayesNet([
+ ('Subsidy', '', 0.001),
+ ('Harvest', '', 0.002),
+ ('Cost', 'Subsidy', 'Harvest',
+ {True: {'sigma': 0.5, 'b': 1, 'a': {'Harvest': 0.5}},
+ False: {'sigma': 0.6, 'b': 1, 'a': {'Harvest': 0.5}}}, 'c'),
+ ('Buys', '', 'Cost', {T: {'mu': 0.5, 'sigma': 0.5}, F: {'mu': 0.6, 'sigma': 0.6}}, 'd')])
+
+
+# ______________________________________________________________________________
+# 13.3 Exact Inference in Bayesian Networks
+# 13.3.1 Inference by enumeration
+
+
+def enumeration_ask(X, e, bn):
+ """
+ Return the conditional probability distribution of variable X
+ given evidence e, from BayesNet bn. [Figure 13.10]
+ >>> enumeration_ask('Burglary', dict(JohnCalls=T, MaryCalls=T), burglary
+ ... ).show_approx()
+ 'False: 0.716, True: 0.284'
+ """
+
+ assert X not in e, "Query variable must be distinct from evidence"
+ Q = ProbDist(X)
+ for xi in bn.variable_values(X):
+ Q[xi] = enumerate_all(bn.variables, extend(e, X, xi), bn)
+ return Q.normalize()
+
+
+def enumerate_all(variables, e, bn):
+ """
+ Return the sum of those entries in P(variables | e{others})
+ consistent with e, where P is the joint distribution represented
+ by bn, and e{others} means e restricted to bn's other variables
+ (the ones other than variables). Parents must precede children in variables.
+ """
+
+ if not variables:
+ return 1.0
+ Y, rest = variables[0], variables[1:]
+ Ynode = bn.variable_node(Y)
+ if Y in e:
+ return Ynode.p(e[Y], e) * enumerate_all(rest, e, bn)
+ else:
+ return sum(Ynode.p(y, e) * enumerate_all(rest, extend(e, Y, y), bn)
+ for y in bn.variable_values(Y))
+
+
+# ______________________________________________________________________________
+# 13.3.2 The variable elimination algorithm
+
+
+def elimination_ask(X, e, bn):
+ """
+ Compute bn's P(X|e) by variable elimination. [Figure 13.12]
+ >>> elimination_ask('Burglary', dict(JohnCalls=T, MaryCalls=T), burglary
+ ... ).show_approx()
+ 'False: 0.716, True: 0.284'
+ """
+ assert X not in e, "Query variable must be distinct from evidence"
+ factors = []
+ for var in reversed(bn.variables):
+ factors.append(make_factor(var, e, bn))
+ if is_hidden(var, X, e):
+ factors = sum_out(var, factors, bn)
+ return pointwise_product(factors, bn).normalize()
+
+
+def is_hidden(var, X, e):
+ """Is var a hidden variable when querying P(X|e)?"""
+ return var != X and var not in e
+
+
+def make_factor(var, e, bn):
+ """
+ Return the factor for var in bn's joint distribution given e.
+ That is, bn's full joint distribution, projected to accord with e,
+ is the pointwise product of these factors for bn's variables.
+ """
+ node = bn.variable_node(var)
+ variables = [X for X in [var] + node.parents if X not in e]
+ cpt = {event_values(e1, variables): node.p(e1[var], e1)
+ for e1 in all_events(variables, bn, e)}
+ return Factor(variables, cpt)
+
+
+def pointwise_product(factors, bn):
+ return reduce(lambda f, g: f.pointwise_product(g, bn), factors)
+
+
+def sum_out(var, factors, bn):
+ """Eliminate var from all factors by summing over its values."""
+ result, var_factors = [], []
+ for f in factors:
+ (var_factors if var in f.variables else result).append(f)
+ result.append(pointwise_product(var_factors, bn).sum_out(var, bn))
+ return result
+
+
+class Factor:
+ """A factor in a joint distribution."""
+
+ def __init__(self, variables, cpt):
+ self.variables = variables
+ self.cpt = cpt
+
+ def pointwise_product(self, other, bn):
+ """Multiply two factors, combining their variables."""
+ variables = list(set(self.variables) | set(other.variables))
+ cpt = {event_values(e, variables): self.p(e) * other.p(e)
+ for e in all_events(variables, bn, {})}
+ return Factor(variables, cpt)
+
+ def sum_out(self, var, bn):
+ """Make a factor eliminating var by summing over its values."""
+ variables = [X for X in self.variables if X != var]
+ cpt = {event_values(e, variables): sum(self.p(extend(e, var, val))
+ for val in bn.variable_values(var))
+ for e in all_events(variables, bn, {})}
+ return Factor(variables, cpt)
+
+ def normalize(self):
+ """Return my probabilities; must be down to one variable."""
+ assert len(self.variables) == 1
+ return ProbDist(self.variables[0],
+ {k: v for ((k,), v) in self.cpt.items()})
+
+ def p(self, e):
+ """Look up my value tabulated for e."""
+ return self.cpt[event_values(e, self.variables)]
+
+
+def all_events(variables, bn, e):
+ """Yield every way of extending e with values for all variables."""
+ if not variables:
+ yield e
+ else:
+ X, rest = variables[0], variables[1:]
+ for e1 in all_events(rest, bn, e):
+ for x in bn.variable_values(X):
+ yield extend(e1, X, x)
+
+
+# ______________________________________________________________________________
+# 13.3.4 Clustering algorithms
+# [Figure 13.14a]: sprinkler network
+
+
+sprinkler = BayesNet([
+ ('Cloudy', '', 0.5),
+ ('Sprinkler', 'Cloudy', {T: 0.10, F: 0.50}),
+ ('Rain', 'Cloudy', {T: 0.80, F: 0.20}),
+ ('WetGrass', 'Sprinkler Rain',
+ {(T, T): 0.99, (T, F): 0.90, (F, T): 0.90, (F, F): 0.00})])
+
+
+# ______________________________________________________________________________
+# 13.4 Approximate Inference for Bayesian Networks
+# 13.4.1 Direct sampling methods
+
+
+def prior_sample(bn):
+ """
+ Randomly sample from bn's full joint distribution. The result
+ is a {variable: value} dict. [Figure 13.15]
+ """
+ event = {}
+ for node in bn.nodes:
+ event[node.variable] = node.sample(event)
+ return event
+
+
+# _________________________________________________________________________
+
+
+def rejection_sampling(X, e, bn, N=10000):
+ """
+ [Figure 13.16]
+ Estimate the probability distribution of variable X given
+ evidence e in BayesNet bn, using N samples.
+ Raises a ZeroDivisionError if all the N samples are rejected,
+ i.e., inconsistent with e.
+ >>> random.seed(47)
+ >>> rejection_sampling('Burglary', dict(JohnCalls=T, MaryCalls=T),
+ ... burglary, 10000).show_approx()
+ 'False: 0.7, True: 0.3'
+ """
+ counts = {x: 0 for x in bn.variable_values(X)} # bold N in [Figure 13.16]
+ for j in range(N):
+ sample = prior_sample(bn) # boldface x in [Figure 13.16]
+ if consistent_with(sample, e):
+ counts[sample[X]] += 1
+ return ProbDist(X, counts)
+
+
+def consistent_with(event, evidence):
+ """Is event consistent with the given evidence?"""
+ return all(evidence.get(k, v) == v
+ for k, v in event.items())
+
+
+# _________________________________________________________________________
+
+
+def likelihood_weighting(X, e, bn, N=10000):
+ """
+ [Figure 13.17]
+ Estimate the probability distribution of variable X given
+ evidence e in BayesNet bn.
+ >>> random.seed(1017)
+ >>> likelihood_weighting('Burglary', dict(JohnCalls=T, MaryCalls=T),
+ ... burglary, 10000).show_approx()
+ 'False: 0.702, True: 0.298'
+ """
+
+ W = {x: 0 for x in bn.variable_values(X)}
+ for j in range(N):
+ sample, weight = weighted_sample(bn, e) # boldface x, w in [Figure 14.15]
+ W[sample[X]] += weight
+ return ProbDist(X, W)
+
+
+def weighted_sample(bn, e):
+ """
+ Sample an event from bn that's consistent with the evidence e;
+ return the event and its weight, the likelihood that the event
+ accords to the evidence.
+ """
+
+ w = 1
+ event = dict(e) # boldface x in [Figure 13.17]
+ for node in bn.nodes:
+ Xi = node.variable
+ if Xi in e:
+ w *= node.p(e[Xi], event)
+ else:
+ event[Xi] = node.sample(event)
+ return event, w
+
+
+# _________________________________________________________________________
+# 13.4.2 Inference by Markov chain simulation
+
+
+def gibbs_ask(X, e, bn, N=1000):
+ """[Figure 13.19]"""
+ assert X not in e, "Query variable must be distinct from evidence"
+ counts = {x: 0 for x in bn.variable_values(X)} # bold N in [Figure 14.16]
+ Z = [var for var in bn.variables if var not in e]
+ state = dict(e) # boldface x in [Figure 14.16]
+ for Zi in Z:
+ state[Zi] = random.choice(bn.variable_values(Zi))
+ for j in range(N):
+ for Zi in Z:
+ state[Zi] = markov_blanket_sample(Zi, state, bn)
+ counts[state[X]] += 1
+ return ProbDist(X, counts)
+
+
+def markov_blanket_sample(X, e, bn):
+ """
+ Return a sample from P(X | mb) where mb denotes that the
+ variables in the Markov blanket of X take their values from event
+ e (which must assign a value to each). The Markov blanket of X is
+ X's parents, children, and children's parents.
+ """
+ Xnode = bn.variable_node(X)
+ Q = ProbDist(X)
+ for xi in bn.variable_values(X):
+ ei = extend(e, X, xi)
+ # [Equation 13.12:]
+ Q[xi] = Xnode.p(xi, e) * product(Yj.p(ei[Yj.variable], ei)
+ for Yj in Xnode.children)
+ # (assuming a Boolean variable here)
+ return probability(Q.normalize()[True])
+
+
+# _________________________________________________________________________
+# 13.4.3 Compiling approximate inference
+
+
+class complied_burglary:
+ """compiled version of burglary network"""
+
+ def Burglary(self, sample):
+ if sample['Alarm']:
+ if sample['Earthquake']:
+ return probability(0.00327)
+ else:
+ return probability(0.485)
+ else:
+ if sample['Earthquake']:
+ return probability(7.05e-05)
+ else:
+ return probability(6.01e-05)
+
+ def Earthquake(self, sample):
+ if sample['Alarm']:
+ if sample['Burglary']:
+ return probability(0.0020212)
+ else:
+ return probability(0.36755)
+ else:
+ if sample['Burglary']:
+ return probability(0.0016672)
+ else:
+ return probability(0.0014222)
+
+ def MaryCalls(self, sample):
+ if sample['Alarm']:
+ return probability(0.7)
+ else:
+ return probability(0.01)
+
+ def JongCalls(self, sample):
+ if sample['Alarm']:
+ return probability(0.9)
+ else:
+ return probability(0.05)
+
+ def Alarm(self, sample):
+ raise NotImplementedError
diff --git a/pytest.ini b/pytest.ini
new file mode 100644
index 000000000..1561b6fe6
--- /dev/null
+++ b/pytest.ini
@@ -0,0 +1,5 @@
+[pytest]
+filterwarnings =
+ ignore::DeprecationWarning
+ ignore::UserWarning
+ ignore::RuntimeWarning
diff --git a/reinforcement_learning.ipynb b/reinforcement_learning.ipynb
new file mode 100644
index 000000000..ee3b6a5eb
--- /dev/null
+++ b/reinforcement_learning.ipynb
@@ -0,0 +1,644 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Reinforcement Learning\n",
+ "\n",
+ "This Jupyter notebook acts as supporting material for **Chapter 21 Reinforcement Learning** of the book* Artificial Intelligence: A Modern Approach*. This notebook makes use of the implementations in `rl.py` module. We also make use of implementation of MDPs in the `mdp.py` module to test our agents. It might be helpful if you have already gone through the Jupyter notebook dealing with Markov decision process. Let us import everything from the `rl` module. It might be helpful to view the source of some of our implementations. Please refer to the Introductory Jupyter notebook for more details."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "from reinforcement_learning import *"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## CONTENTS\n",
+ "\n",
+ "* Overview\n",
+ "* Passive Reinforcement Learning\n",
+ " - Direct Utility Estimation\n",
+ " - Adaptive Dynamic Programming\n",
+ " - Temporal-Difference Agent\n",
+ "* Active Reinforcement Learning\n",
+ " - Q learning"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "collapsed": true
+ },
+ "source": [
+ "## OVERVIEW\n",
+ "\n",
+ "Before we start playing with the actual implementations let us review a couple of things about RL.\n",
+ "\n",
+ "1. Reinforcement Learning is concerned with how software agents ought to take actions in an environment so as to maximize some notion of cumulative reward. \n",
+ "\n",
+ "2. Reinforcement learning differs from standard supervised learning in that correct input/output pairs are never presented, nor sub-optimal actions explicitly corrected. Further, there is a focus on on-line performance, which involves finding a balance between exploration (of uncharted territory) and exploitation (of current knowledge).\n",
+ "\n",
+ "-- Source: [Wikipedia](https://en.wikipedia.org/wiki/Reinforcement_learning)\n",
+ "\n",
+ "In summary we have a sequence of state action transitions with rewards associated with some states. Our goal is to find the optimal policy $\\pi$ which tells us what action to take in each state."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## PASSIVE REINFORCEMENT LEARNING\n",
+ "\n",
+ "In passive Reinforcement Learning the agent follows a fixed policy $\\pi$. Passive learning attempts to evaluate the given policy $pi$ - without any knowledge of the Reward function $R(s)$ and the Transition model $P(s'\\ |\\ s, a)$.\n",
+ "\n",
+ "This is usually done by some method of **utility estimation**. The agent attempts to directly learn the utility of each state that would result from following the policy. Note that at each step, it has to *perceive* the reward and the state - it has no global knowledge of these. Thus, if a certain the entire set of actions offers a very low probability of attaining some state $s_+$ - the agent may never perceive the reward $R(s_+)$.\n",
+ "\n",
+ "Consider a situation where an agent is given a policy to follow. Thus, at any point it knows only its current state and current reward, and the action it must take next. This action may lead it to more than one state, with different probabilities.\n",
+ "\n",
+ "For a series of actions given by $\\pi$, the estimated utility $U$:\n",
+ "$$U^{\\pi}(s) = E(\\sum_{t=0}^\\inf \\gamma^t R^t(s')$$)\n",
+ "Or the expected value of summed discounted rewards until termination.\n",
+ "\n",
+ "Based on this concept, we discuss three methods of estimating utility:\n",
+ "\n",
+ "1. **Direct Utility Estimation (DUE)**\n",
+ " \n",
+ " The first, most naive method of estimating utility comes from the simplest interpretation of the above definition. We construct an agent that follows the policy until it reaches the terminal state. At each step, it logs its current state, reward. Once it reaches the terminal state, it can estimate the utility for each state for *that* iteration, by simply summing the discounted rewards from that state to the terminal one.\n",
+ "\n",
+ " It can now run this 'simulation' $n$ times, and calculate the average utility of each state. If a state occurs more than once in a simulation, both its utility values are counted separately.\n",
+ " \n",
+ " Note that this method may be prohibitively slow for very large statespaces. Besides, **it pays no attention to the transition probability $P(s'\\ |\\ s, a)$.** It misses out on information that it is capable of collecting (say, by recording the number of times an action from one state led to another state). The next method addresses this issue.\n",
+ " \n",
+ "2. **Adaptive Dynamic Programming (ADP)**\n",
+ " \n",
+ " This method makes use of knowledge of the past state $s$, the action $a$, and the new perceived state $s'$ to estimate the transition probability $P(s'\\ |\\ s,a)$. It does this by the simple counting of new states resulting from previous states and actions. \n",
+ " The program runs through the policy a number of times, keeping track of:\n",
+ " - each occurrence of state $s$ and the policy-recommended action $a$ in $N_{sa}$\n",
+ " - each occurrence of $s'$ resulting from $a$ on $s$ in $N_{s'|sa}$.\n",
+ " \n",
+ " It can thus estimate $P(s'\\ |\\ s,a)$ as $N_{s'|sa}/N_{sa}$, which in the limit of infinite trials, will converge to the true value. \n",
+ " Using the transition probabilities thus estimated, it can apply `POLICY-EVALUATION` to estimate the utilities $U(s)$ using properties of convergence of the Bellman functions.\n",
+ "\n",
+ "3. **Temporal-difference learning (TD)**\n",
+ " \n",
+ " Instead of explicitly building the transition model $P$, the temporal-difference model makes use of the expected closeness between the utilities of two consecutive states $s$ and $s'$.\n",
+ " For the transition $s$ to $s'$, the update is written as:\n",
+ "$$U^{\\pi}(s) \\leftarrow U^{\\pi}(s) + \\alpha \\left( R(s) + \\gamma U^{\\pi}(s') - U^{\\pi}(s) \\right)$$\n",
+ " This model implicitly incorporates the transition probabilities by being weighed for each state by the number of times it is achieved from the current state. Thus, over a number of iterations, it converges similarly to the Bellman equations.\n",
+ " The advantage of the TD learning model is its relatively simple computation at each step, rather than having to keep track of various counts.\n",
+ " For $n_s$ states and $n_a$ actions the ADP model would have $n_s \\times n_a$ numbers $N_{sa}$ and $n_s^2 \\times n_a$ numbers $N_{s'|sa}$ to keep track of. The TD model must only keep track of a utility $U(s)$ for each state."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Demonstrating Passive agents\n",
+ "\n",
+ "Passive agents are implemented in `rl.py` as various `Agent-Class`es.\n",
+ "\n",
+ "To demonstrate these agents, we make use of the `GridMDP` object from the `MDP` module. `sequential_decision_environment` is similar to that used for the `MDP` notebook but has discounting with $\\gamma = 0.9$.\n",
+ "\n",
+ "The `Agent-Program` can be obtained by creating an instance of the relevant `Agent-Class`. The `__call__` method allows the `Agent-Class` to be called as a function. The class needs to be instantiated with a policy ($\\pi$) and an `MDP` whose utility of states will be estimated."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "from mdp import sequential_decision_environment"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The `sequential_decision_environment` is a GridMDP object as shown below. The rewards are **+1** and **-1** in the terminal states, and **-0.04** in the rest. Now we define actions and a policy similar to **Fig 21.1** in the book."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "# Action Directions\n",
+ "north = (0, 1)\n",
+ "south = (0,-1)\n",
+ "west = (-1, 0)\n",
+ "east = (1, 0)\n",
+ "\n",
+ "policy = {\n",
+ " (0, 2): east, (1, 2): east, (2, 2): east, (3, 2): None,\n",
+ " (0, 1): north, (2, 1): north, (3, 1): None,\n",
+ " (0, 0): north, (1, 0): west, (2, 0): west, (3, 0): west, \n",
+ "}\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Direction Utility Estimation Agent\n",
+ "\n",
+ "The `PassiveDEUAgent` class in the `rl` module implements the Agent Program described in **Fig 21.2** of the AIMA Book. `PassiveDEUAgent` sums over rewards to find the estimated utility for each state. It thus requires the running of a number of iterations."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "%psource PassiveDUEAgent"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "DUEagent = PassiveDUEAgent(policy, sequential_decision_environment)\n",
+ "for i in range(200):\n",
+ " run_single_trial(DUEagent, sequential_decision_environment)\n",
+ " DUEagent.estimate_U()\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The calculated utilities are:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "print('\\n'.join([str(k)+':'+str(v) for k, v in DUEagent.U.items()]))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Adaptive Dynamic Programming Agent\n",
+ "\n",
+ "The `PassiveADPAgent` class in the `rl` module implements the Agent Program described in **Fig 21.2** of the AIMA Book. `PassiveADPAgent` uses state transition and occurrence counts to estimate $P$, and then $U$. Go through the source below to understand the agent."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "%psource PassiveADPAgent"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We instantiate a `PassiveADPAgent` below with the `GridMDP` shown and train it over 200 iterations. The `rl` module has a simple implementation to simulate iterations. The function is called **run_single_trial**."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "ADPagent = PassiveADPAgent(policy, sequential_decision_environment)\n",
+ "for i in range(200):\n",
+ " run_single_trial(ADPagent, sequential_decision_environment)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The calculated utilities are:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "print('\\n'.join([str(k)+':'+str(v) for k, v in ADPagent.U.items()]))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Passive Temporal Difference Agent\n",
+ "\n",
+ "`PassiveTDAgent` uses temporal differences to learn utility estimates. We learn the difference between the states and backup the values to previous states. Let us look into the source before we see some usage examples."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "%psource PassiveTDAgent"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "In creating the `TDAgent`, we use the **same learning rate** $\\alpha$ as given in the footnote of the book on **page 837**."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "TDagent = PassiveTDAgent(policy, sequential_decision_environment, alpha = lambda n: 60./(59+n))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Now we run **200 trials** for the agent to estimate Utilities."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "for i in range(200):\n",
+ " run_single_trial(TDagent,sequential_decision_environment)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The calculated utilities are:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "print('\\n'.join([str(k)+':'+str(v) for k, v in TDagent.U.items()]))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Comparison with value iteration method\n",
+ "\n",
+ "We can also compare the utility estimates learned by our agent to those obtained via **value iteration**.\n",
+ "\n",
+ "**Note that value iteration has a priori knowledge of the transition table $P$, the rewards $R$, and all the states $s$.**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "from mdp import value_iteration"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The values calculated by value iteration:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "U_values = value_iteration(sequential_decision_environment)\n",
+ "print('\\n'.join([str(k)+':'+str(v) for k, v in U_values.items()]))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Evolution of utility estimates over iterations\n",
+ "\n",
+ "We can explore how these estimates vary with time by using plots similar to **Fig 21.5a**. We will first enable matplotlib using the inline backend. We also define a function to collect the values of utilities at each iteration."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "%matplotlib inline\n",
+ "import matplotlib.pyplot as plt\n",
+ "\n",
+ "def graph_utility_estimates(agent_program, mdp, no_of_iterations, states_to_graph):\n",
+ " graphs = {state:[] for state in states_to_graph}\n",
+ " for iteration in range(1,no_of_iterations+1):\n",
+ " run_single_trial(agent_program, mdp)\n",
+ " for state in states_to_graph:\n",
+ " graphs[state].append((iteration, agent_program.U[state]))\n",
+ " for state, value in graphs.items():\n",
+ " state_x, state_y = zip(*value)\n",
+ " plt.plot(state_x, state_y, label=str(state))\n",
+ " plt.ylim([0,1.2])\n",
+ " plt.legend(loc='lower right')\n",
+ " plt.xlabel('Iterations')\n",
+ " plt.ylabel('U')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Here is a plot of state $(2,2)$."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "agent = PassiveTDAgent(policy, sequential_decision_environment, alpha=lambda n: 60./(59+n))\n",
+ "graph_utility_estimates(agent, sequential_decision_environment, 500, [(2,2)])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "It is also possible to plot multiple states on the same plot. As expected, the utility of the finite state $(3,2)$ stays constant and is equal to $R((3,2)) = 1$."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "graph_utility_estimates(agent, sequential_decision_environment, 500, [(2,2), (3,2)])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "collapsed": true
+ },
+ "source": [
+ "## ACTIVE REINFORCEMENT LEARNING\n",
+ "\n",
+ "Unlike Passive Reinforcement Learning in Active Reinforcement Learning we are not bound by a policy pi and we need to select our actions. In other words the agent needs to learn an optimal policy. The fundamental tradeoff the agent needs to face is that of exploration vs. exploitation. "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### QLearning Agent\n",
+ "\n",
+ "The QLearningAgent class in the rl module implements the Agent Program described in **Fig 21.8** of the AIMA Book. In Q-Learning the agent learns an action-value function Q which gives the utility of taking a given action in a particular state. Q-Learning does not required a transition model and hence is a model free method. Let us look into the source before we see some usage examples."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "%psource QLearningAgent"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The Agent Program can be obtained by creating the instance of the class by passing the appropriate parameters. Because of the __ call __ method the object that is created behaves like a callable and returns an appropriate action as most Agent Programs do. To instantiate the object we need a mdp similar to the PassiveTDAgent.\n",
+ "\n",
+ " Let us use the same GridMDP object we used above. **Figure 17.1 (sequential_decision_environment)** is similar to **Figure 21.1** but has some discounting as **gamma = 0.9**. The class also implements an exploration function **f** which returns fixed **Rplus** until agent has visited state, action **Ne** number of times. This is the same as the one defined on page **842** of the book. The method **actions_in_state** returns actions possible in given state. It is useful when applying max and argmax operations."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let us create our object now. We also use the **same alpha** as given in the footnote of the book on **page 837**. We use **Rplus = 2** and **Ne = 5** as defined on page 843. **Fig 21.7** "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "q_agent = QLearningAgent(sequential_decision_environment, Ne=5, Rplus=2, \n",
+ " alpha=lambda n: 60./(59+n))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Now to try out the q_agent we make use of the **run_single_trial** function in rl.py (which was also used above). Let us use **200** iterations."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "for i in range(200):\n",
+ " run_single_trial(q_agent,sequential_decision_environment)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Now let us see the Q Values. The keys are state-action pairs. Where different actions correspond according to:\n",
+ "\n",
+ "north = (0, 1)\n",
+ "south = (0,-1)\n",
+ "west = (-1, 0)\n",
+ "east = (1, 0)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "q_agent.Q"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The Utility **U** of each state is related to **Q** by the following equation.\n",
+ "\n",
+ "**U (s) = max a Q(s, a)**\n",
+ "\n",
+ "Let us convert the Q Values above into U estimates.\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "U = defaultdict(lambda: -1000.) # Very Large Negative Value for Comparison see below.\n",
+ "for state_action, value in q_agent.Q.items():\n",
+ " state, action = state_action\n",
+ " if U[state] < value:\n",
+ " U[state] = value"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "U"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let us finally compare these estimates to value_iteration results."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "print(value_iteration(sequential_decision_environment))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.6.3"
+ },
+ "pycharm": {
+ "stem_cell": {
+ "cell_type": "raw",
+ "source": [],
+ "metadata": {
+ "collapsed": false
+ }
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}
\ No newline at end of file
diff --git a/reinforcement_learning.py b/reinforcement_learning.py
new file mode 100644
index 000000000..4cb91af0f
--- /dev/null
+++ b/reinforcement_learning.py
@@ -0,0 +1,337 @@
+"""Reinforcement Learning (Chapter 21)"""
+
+import random
+from collections import defaultdict
+
+from mdp import MDP, policy_evaluation
+
+
+class PassiveDUEAgent:
+ """
+ Passive (non-learning) agent that uses direct utility estimation
+ on a given MDP and policy.
+
+ import sys
+ from mdp import sequential_decision_environment
+ north = (0, 1)
+ south = (0,-1)
+ west = (-1, 0)
+ east = (1, 0)
+ policy = {(0, 2): east, (1, 2): east, (2, 2): east, (3, 2): None, (0, 1): north, (2, 1): north,
+ (3, 1): None, (0, 0): north, (1, 0): west, (2, 0): west, (3, 0): west,}
+ agent = PassiveDUEAgent(policy, sequential_decision_environment)
+ for i in range(200):
+ run_single_trial(agent,sequential_decision_environment)
+ agent.estimate_U()
+ agent.U[(0, 0)] > 0.2
+ True
+ """
+
+ def __init__(self, pi, mdp):
+ self.pi = pi
+ self.mdp = mdp
+ self.U = {}
+ self.s = None
+ self.a = None
+ self.s_history = []
+ self.r_history = []
+ self.init = mdp.init
+
+ def __call__(self, percept):
+ s1, r1 = percept
+ self.s_history.append(s1)
+ self.r_history.append(r1)
+ ##
+ ##
+ if s1 in self.mdp.terminals:
+ self.s = self.a = None
+ else:
+ self.s, self.a = s1, self.pi[s1]
+ return self.a
+
+ def estimate_U(self):
+ # this function can be called only if the MDP has reached a terminal state
+ # it will also reset the mdp history
+ assert self.a is None, 'MDP is not in terminal state'
+ assert len(self.s_history) == len(self.r_history)
+ # calculating the utilities based on the current iteration
+ U2 = {s: [] for s in set(self.s_history)}
+ for i in range(len(self.s_history)):
+ s = self.s_history[i]
+ U2[s] += [sum(self.r_history[i:])]
+ U2 = {k: sum(v) / max(len(v), 1) for k, v in U2.items()}
+ # resetting history
+ self.s_history, self.r_history = [], []
+ # setting the new utilities to the average of the previous
+ # iteration and this one
+ for k in U2.keys():
+ if k in self.U.keys():
+ self.U[k] = (self.U[k] + U2[k]) / 2
+ else:
+ self.U[k] = U2[k]
+ return self.U
+
+ def update_state(self, percept):
+ """To be overridden in most cases. The default case
+ assumes the percept to be of type (state, reward)"""
+ return percept
+
+
+class PassiveADPAgent:
+ """
+ [Figure 21.2]
+ Passive (non-learning) agent that uses adaptive dynamic programming
+ on a given MDP and policy.
+
+ import sys
+ from mdp import sequential_decision_environment
+ north = (0, 1)
+ south = (0,-1)
+ west = (-1, 0)
+ east = (1, 0)
+ policy = {(0, 2): east, (1, 2): east, (2, 2): east, (3, 2): None, (0, 1): north, (2, 1): north,
+ (3, 1): None, (0, 0): north, (1, 0): west, (2, 0): west, (3, 0): west,}
+ agent = PassiveADPAgent(policy, sequential_decision_environment)
+ for i in range(100):
+ run_single_trial(agent,sequential_decision_environment)
+
+ agent.U[(0, 0)] > 0.2
+ True
+ agent.U[(0, 1)] > 0.2
+ True
+ """
+
+ class ModelMDP(MDP):
+ """Class for implementing modified Version of input MDP with
+ an editable transition model P and a custom function T."""
+
+ def __init__(self, init, actlist, terminals, gamma, states):
+ super().__init__(init, actlist, terminals, states=states, gamma=gamma)
+ nested_dict = lambda: defaultdict(nested_dict)
+ # StackOverflow:whats-the-best-way-to-initialize-a-dict-of-dicts-in-python
+ self.P = nested_dict()
+
+ def T(self, s, a):
+ """Return a list of tuples with probabilities for states
+ based on the learnt model P."""
+ return [(prob, res) for (res, prob) in self.P[(s, a)].items()]
+
+ def __init__(self, pi, mdp):
+ self.pi = pi
+ self.mdp = PassiveADPAgent.ModelMDP(mdp.init, mdp.actlist,
+ mdp.terminals, mdp.gamma, mdp.states)
+ self.U = {}
+ self.Nsa = defaultdict(int)
+ self.Ns1_sa = defaultdict(int)
+ self.s = None
+ self.a = None
+ self.visited = set() # keeping track of visited states
+
+ def __call__(self, percept):
+ s1, r1 = percept
+ mdp = self.mdp
+ R, P, terminals, pi = mdp.reward, mdp.P, mdp.terminals, self.pi
+ s, a, Nsa, Ns1_sa, U = self.s, self.a, self.Nsa, self.Ns1_sa, self.U
+
+ if s1 not in self.visited: # Reward is only known for visited state.
+ U[s1] = R[s1] = r1
+ self.visited.add(s1)
+ if s is not None:
+ Nsa[(s, a)] += 1
+ Ns1_sa[(s1, s, a)] += 1
+ # for each t such that Ns′|sa [t, s, a] is nonzero
+ for t in [res for (res, state, act), freq in Ns1_sa.items()
+ if (state, act) == (s, a) and freq != 0]:
+ P[(s, a)][t] = Ns1_sa[(t, s, a)] / Nsa[(s, a)]
+
+ self.U = policy_evaluation(pi, U, mdp)
+ ##
+ ##
+ self.Nsa, self.Ns1_sa = Nsa, Ns1_sa
+ if s1 in terminals:
+ self.s = self.a = None
+ else:
+ self.s, self.a = s1, self.pi[s1]
+ return self.a
+
+ def update_state(self, percept):
+ """To be overridden in most cases. The default case
+ assumes the percept to be of type (state, reward)."""
+ return percept
+
+
+class PassiveTDAgent:
+ """
+ [Figure 21.4]
+ The abstract class for a Passive (non-learning) agent that uses
+ temporal differences to learn utility estimates. Override update_state
+ method to convert percept to state and reward. The mdp being provided
+ should be an instance of a subclass of the MDP Class.
+
+ import sys
+ from mdp import sequential_decision_environment
+ north = (0, 1)
+ south = (0,-1)
+ west = (-1, 0)
+ east = (1, 0)
+ policy = {(0, 2): east, (1, 2): east, (2, 2): east, (3, 2): None, (0, 1): north, (2, 1): north,
+ (3, 1): None, (0, 0): north, (1, 0): west, (2, 0): west, (3, 0): west,}
+ agent = PassiveTDAgent(policy, sequential_decision_environment, alpha=lambda n: 60./(59+n))
+ for i in range(200):
+ run_single_trial(agent,sequential_decision_environment)
+
+ agent.U[(0, 0)] > 0.2
+ True
+ agent.U[(0, 1)] > 0.2
+ True
+ """
+
+ def __init__(self, pi, mdp, alpha=None):
+
+ self.pi = pi
+ self.U = {s: 0. for s in mdp.states}
+ self.Ns = {s: 0 for s in mdp.states}
+ self.s = None
+ self.a = None
+ self.r = None
+ self.gamma = mdp.gamma
+ self.terminals = mdp.terminals
+
+ if alpha:
+ self.alpha = alpha
+ else:
+ self.alpha = lambda n: 1 / (1 + n) # udacity video
+
+ def __call__(self, percept):
+ s1, r1 = self.update_state(percept)
+ pi, U, Ns, s, r = self.pi, self.U, self.Ns, self.s, self.r
+ alpha, gamma, terminals = self.alpha, self.gamma, self.terminals
+ if not Ns[s1]:
+ U[s1] = r1
+ if s is not None:
+ Ns[s] += 1
+ U[s] += alpha(Ns[s]) * (r + gamma * U[s1] - U[s])
+ if s1 in terminals:
+ self.s = self.a = self.r = None
+ else:
+ self.s, self.a, self.r = s1, pi[s1], r1
+ return self.a
+
+ def update_state(self, percept):
+ """To be overridden in most cases. The default case
+ assumes the percept to be of type (state, reward)."""
+ return percept
+
+
+class QLearningAgent:
+ """
+ [Figure 21.8]
+ An exploratory Q-learning agent. It avoids having to learn the transition
+ model because the Q-value of a state can be related directly to those of
+ its neighbors.
+
+ import sys
+ from mdp import sequential_decision_environment
+ north = (0, 1)
+ south = (0,-1)
+ west = (-1, 0)
+ east = (1, 0)
+ policy = {(0, 2): east, (1, 2): east, (2, 2): east, (3, 2): None, (0, 1): north, (2, 1): north,
+ (3, 1): None, (0, 0): north, (1, 0): west, (2, 0): west, (3, 0): west,}
+ q_agent = QLearningAgent(sequential_decision_environment, Ne=5, Rplus=2, alpha=lambda n: 60./(59+n))
+ for i in range(200):
+ run_single_trial(q_agent,sequential_decision_environment)
+
+ q_agent.Q[((0, 1), (0, 1))] >= -0.5
+ True
+ q_agent.Q[((1, 0), (0, -1))] <= 0.5
+ True
+ """
+
+ def __init__(self, mdp, Ne, Rplus, alpha=None):
+
+ self.gamma = mdp.gamma
+ self.terminals = mdp.terminals
+ self.all_act = mdp.actlist
+ self.Ne = Ne # iteration limit in exploration function
+ self.Rplus = Rplus # large value to assign before iteration limit
+ self.Q = defaultdict(float)
+ self.Nsa = defaultdict(float)
+ self.s = None
+ self.a = None
+ self.r = None
+
+ if alpha:
+ self.alpha = alpha
+ else:
+ self.alpha = lambda n: 1. / (1 + n) # udacity video
+
+ def f(self, u, n):
+ """Exploration function. Returns fixed Rplus until
+ agent has visited state, action a Ne number of times.
+ Same as ADP agent in book."""
+ if n < self.Ne:
+ return self.Rplus
+ else:
+ return u
+
+ def actions_in_state(self, state):
+ """Return actions possible in given state.
+ Useful for max and argmax."""
+ if state in self.terminals:
+ return [None]
+ else:
+ return self.all_act
+
+ def __call__(self, percept):
+ s1, r1 = self.update_state(percept)
+ Q, Nsa, s, a, r = self.Q, self.Nsa, self.s, self.a, self.r
+ alpha, gamma, terminals = self.alpha, self.gamma, self.terminals,
+ actions_in_state = self.actions_in_state
+
+ if s in terminals:
+ Q[s, None] = r1
+ if s is not None:
+ Nsa[s, a] += 1
+ Q[s, a] += alpha(Nsa[s, a]) * (r + gamma * max(Q[s1, a1]
+ for a1 in actions_in_state(s1)) - Q[s, a])
+ if s in terminals:
+ self.s = self.a = self.r = None
+ else:
+ self.s, self.r = s1, r1
+ self.a = max(actions_in_state(s1), key=lambda a1: self.f(Q[s1, a1], Nsa[s1, a1]))
+ return self.a
+
+ def update_state(self, percept):
+ """To be overridden in most cases. The default case
+ assumes the percept to be of type (state, reward)."""
+ return percept
+
+
+def run_single_trial(agent_program, mdp):
+ """Execute trial for given agent_program
+ and mdp. mdp should be an instance of subclass
+ of mdp.MDP """
+
+ def take_single_action(mdp, s, a):
+ """
+ Select outcome of taking action a
+ in state s. Weighted Sampling.
+ """
+ x = random.uniform(0, 1)
+ cumulative_probability = 0.0
+ for probability_state in mdp.T(s, a):
+ probability, state = probability_state
+ cumulative_probability += probability
+ if x < cumulative_probability:
+ break
+ return state
+
+ current_state = mdp.init
+ while True:
+ current_reward = mdp.R(current_state)
+ percept = (current_state, current_reward)
+ next_action = agent_program(percept)
+ if next_action is None:
+ break
+ current_state = take_single_action(mdp, current_state, next_action)
diff --git a/reinforcement_learning4e.py b/reinforcement_learning4e.py
new file mode 100644
index 000000000..eaaba3e5a
--- /dev/null
+++ b/reinforcement_learning4e.py
@@ -0,0 +1,353 @@
+"""Reinforcement Learning (Chapter 21)"""
+
+import random
+from collections import defaultdict
+
+from mdp4e import MDP, policy_evaluation
+
+
+# _________________________________________
+# 21.2 Passive Reinforcement Learning
+# 21.2.1 Direct utility estimation
+
+
+class PassiveDUEAgent:
+ """
+ Passive (non-learning) agent that uses direct utility estimation
+ on a given MDP and policy.
+
+ import sys
+ from mdp import sequential_decision_environment
+ north = (0, 1)
+ south = (0,-1)
+ west = (-1, 0)
+ east = (1, 0)
+ policy = {(0, 2): east, (1, 2): east, (2, 2): east, (3, 2): None, (0, 1): north, (2, 1): north,
+ (3, 1): None, (0, 0): north, (1, 0): west, (2, 0): west, (3, 0): west,}
+ agent = PassiveDUEAgent(policy, sequential_decision_environment)
+ for i in range(200):
+ run_single_trial(agent,sequential_decision_environment)
+ agent.estimate_U()
+ agent.U[(0, 0)] > 0.2
+ True
+ """
+
+ def __init__(self, pi, mdp):
+ self.pi = pi
+ self.mdp = mdp
+ self.U = {}
+ self.s = None
+ self.a = None
+ self.s_history = []
+ self.r_history = []
+ self.init = mdp.init
+
+ def __call__(self, percept):
+ s1, r1 = percept
+ self.s_history.append(s1)
+ self.r_history.append(r1)
+ ##
+ ##
+ if s1 in self.mdp.terminals:
+ self.s = self.a = None
+ else:
+ self.s, self.a = s1, self.pi[s1]
+ return self.a
+
+ def estimate_U(self):
+ # this function can be called only if the MDP has reached a terminal state
+ # it will also reset the mdp history
+ assert self.a is None, 'MDP is not in terminal state'
+ assert len(self.s_history) == len(self.r_history)
+ # calculating the utilities based on the current iteration
+ U2 = {s: [] for s in set(self.s_history)}
+ for i in range(len(self.s_history)):
+ s = self.s_history[i]
+ U2[s] += [sum(self.r_history[i:])]
+ U2 = {k: sum(v) / max(len(v), 1) for k, v in U2.items()}
+ # resetting history
+ self.s_history, self.r_history = [], []
+ # setting the new utilities to the average of the previous
+ # iteration and this one
+ for k in U2.keys():
+ if k in self.U.keys():
+ self.U[k] = (self.U[k] + U2[k]) / 2
+ else:
+ self.U[k] = U2[k]
+ return self.U
+
+ def update_state(self, percept):
+ """To be overridden in most cases. The default case
+ assumes the percept to be of type (state, reward)"""
+ return percept
+
+
+# 21.2.2 Adaptive dynamic programming
+
+
+class PassiveADPAgent:
+ """
+ [Figure 21.2]
+ Passive (non-learning) agent that uses adaptive dynamic programming
+ on a given MDP and policy.
+
+ import sys
+ from mdp import sequential_decision_environment
+ north = (0, 1)
+ south = (0,-1)
+ west = (-1, 0)
+ east = (1, 0)
+ policy = {(0, 2): east, (1, 2): east, (2, 2): east, (3, 2): None, (0, 1): north, (2, 1): north,
+ (3, 1): None, (0, 0): north, (1, 0): west, (2, 0): west, (3, 0): west,}
+ agent = PassiveADPAgent(policy, sequential_decision_environment)
+ for i in range(100):
+ run_single_trial(agent,sequential_decision_environment)
+
+ agent.U[(0, 0)] > 0.2
+ True
+ agent.U[(0, 1)] > 0.2
+ True
+ """
+
+ class ModelMDP(MDP):
+ """Class for implementing modified Version of input MDP with
+ an editable transition model P and a custom function T."""
+
+ def __init__(self, init, actlist, terminals, gamma, states):
+ super().__init__(init, actlist, terminals, states=states, gamma=gamma)
+ nested_dict = lambda: defaultdict(nested_dict)
+ # StackOverflow:whats-the-best-way-to-initialize-a-dict-of-dicts-in-python
+ self.P = nested_dict()
+
+ def T(self, s, a):
+ """Return a list of tuples with probabilities for states
+ based on the learnt model P."""
+ return [(prob, res) for (res, prob) in self.P[(s, a)].items()]
+
+ def __init__(self, pi, mdp):
+ self.pi = pi
+ self.mdp = PassiveADPAgent.ModelMDP(mdp.init, mdp.actlist,
+ mdp.terminals, mdp.gamma, mdp.states)
+ self.U = {}
+ self.Nsa = defaultdict(int)
+ self.Ns1_sa = defaultdict(int)
+ self.s = None
+ self.a = None
+ self.visited = set() # keeping track of visited states
+
+ def __call__(self, percept):
+ s1, r1 = percept
+ mdp = self.mdp
+ R, P, terminals, pi = mdp.reward, mdp.P, mdp.terminals, self.pi
+ s, a, Nsa, Ns1_sa, U = self.s, self.a, self.Nsa, self.Ns1_sa, self.U
+
+ if s1 not in self.visited: # Reward is only known for visited state.
+ U[s1] = R[s1] = r1
+ self.visited.add(s1)
+ if s is not None:
+ Nsa[(s, a)] += 1
+ Ns1_sa[(s1, s, a)] += 1
+ # for each t such that Ns′|sa [t, s, a] is nonzero
+ for t in [res for (res, state, act), freq in Ns1_sa.items()
+ if (state, act) == (s, a) and freq != 0]:
+ P[(s, a)][t] = Ns1_sa[(t, s, a)] / Nsa[(s, a)]
+
+ self.U = policy_evaluation(pi, U, mdp)
+ ##
+ ##
+ self.Nsa, self.Ns1_sa = Nsa, Ns1_sa
+ if s1 in terminals:
+ self.s = self.a = None
+ else:
+ self.s, self.a = s1, self.pi[s1]
+ return self.a
+
+ def update_state(self, percept):
+ """To be overridden in most cases. The default case
+ assumes the percept to be of type (state, reward)."""
+ return percept
+
+
+# 21.2.3 Temporal-difference learning
+
+
+class PassiveTDAgent:
+ """
+ [Figure 21.4]
+ The abstract class for a Passive (non-learning) agent that uses
+ temporal differences to learn utility estimates. Override update_state
+ method to convert percept to state and reward. The mdp being provided
+ should be an instance of a subclass of the MDP Class.
+
+ import sys
+ from mdp import sequential_decision_environment
+ north = (0, 1)
+ south = (0,-1)
+ west = (-1, 0)
+ east = (1, 0)
+ policy = {(0, 2): east, (1, 2): east, (2, 2): east, (3, 2): None, (0, 1): north, (2, 1): north,
+ (3, 1): None, (0, 0): north, (1, 0): west, (2, 0): west, (3, 0): west,}
+ agent = PassiveTDAgent(policy, sequential_decision_environment, alpha=lambda n: 60./(59+n))
+ for i in range(200):
+ run_single_trial(agent,sequential_decision_environment)
+
+ agent.U[(0, 0)] > 0.2
+ True
+ agent.U[(0, 1)] > 0.2
+ True
+ """
+
+ def __init__(self, pi, mdp, alpha=None):
+
+ self.pi = pi
+ self.U = {s: 0. for s in mdp.states}
+ self.Ns = {s: 0 for s in mdp.states}
+ self.s = None
+ self.a = None
+ self.r = None
+ self.gamma = mdp.gamma
+ self.terminals = mdp.terminals
+
+ if alpha:
+ self.alpha = alpha
+ else:
+ self.alpha = lambda n: 1 / (1 + n) # udacity video
+
+ def __call__(self, percept):
+ s1, r1 = self.update_state(percept)
+ pi, U, Ns, s, r = self.pi, self.U, self.Ns, self.s, self.r
+ alpha, gamma, terminals = self.alpha, self.gamma, self.terminals
+ if not Ns[s1]:
+ U[s1] = r1
+ if s is not None:
+ Ns[s] += 1
+ U[s] += alpha(Ns[s]) * (r + gamma * U[s1] - U[s])
+ if s1 in terminals:
+ self.s = self.a = self.r = None
+ else:
+ self.s, self.a, self.r = s1, pi[s1], r1
+ return self.a
+
+ def update_state(self, percept):
+ """To be overridden in most cases. The default case
+ assumes the percept to be of type (state, reward)."""
+ return percept
+
+
+# __________________________________________
+# 21.3. Active Reinforcement Learning
+# 21.3.2 Learning an action-utility function
+
+
+class QLearningAgent:
+ """
+ [Figure 21.8]
+ An exploratory Q-learning agent. It avoids having to learn the transition
+ model because the Q-value of a state can be related directly to those of
+ its neighbors.
+
+ import sys
+ from mdp import sequential_decision_environment
+ north = (0, 1)
+ south = (0,-1)
+ west = (-1, 0)
+ east = (1, 0)
+ policy = {(0, 2): east, (1, 2): east, (2, 2): east, (3, 2): None, (0, 1): north, (2, 1): north,
+ (3, 1): None, (0, 0): north, (1, 0): west, (2, 0): west, (3, 0): west,}
+ q_agent = QLearningAgent(sequential_decision_environment, Ne=5, Rplus=2, alpha=lambda n: 60./(59+n))
+ for i in range(200):
+ run_single_trial(q_agent,sequential_decision_environment)
+
+ q_agent.Q[((0, 1), (0, 1))] >= -0.5
+ True
+ q_agent.Q[((1, 0), (0, -1))] <= 0.5
+ True
+ """
+
+ def __init__(self, mdp, Ne, Rplus, alpha=None):
+
+ self.gamma = mdp.gamma
+ self.terminals = mdp.terminals
+ self.all_act = mdp.actlist
+ self.Ne = Ne # iteration limit in exploration function
+ self.Rplus = Rplus # large value to assign before iteration limit
+ self.Q = defaultdict(float)
+ self.Nsa = defaultdict(float)
+ self.s = None
+ self.a = None
+ self.r = None
+
+ if alpha:
+ self.alpha = alpha
+ else:
+ self.alpha = lambda n: 1. / (1 + n) # udacity video
+
+ def f(self, u, n):
+ """Exploration function. Returns fixed Rplus until
+ agent has visited state, action a Ne number of times.
+ Same as ADP agent in book."""
+ if n < self.Ne:
+ return self.Rplus
+ else:
+ return u
+
+ def actions_in_state(self, state):
+ """Return actions possible in given state.
+ Useful for max and argmax."""
+ if state in self.terminals:
+ return [None]
+ else:
+ return self.all_act
+
+ def __call__(self, percept):
+ s1, r1 = self.update_state(percept)
+ Q, Nsa, s, a, r = self.Q, self.Nsa, self.s, self.a, self.r
+ alpha, gamma, terminals = self.alpha, self.gamma, self.terminals,
+ actions_in_state = self.actions_in_state
+
+ if s in terminals:
+ Q[s, None] = r1
+ if s is not None:
+ Nsa[s, a] += 1
+ Q[s, a] += alpha(Nsa[s, a]) * (r + gamma * max(Q[s1, a1]
+ for a1 in actions_in_state(s1)) - Q[s, a])
+ if s in terminals:
+ self.s = self.a = self.r = None
+ else:
+ self.s, self.r = s1, r1
+ self.a = max(actions_in_state(s1), key=lambda a1: self.f(Q[s1, a1], Nsa[s1, a1]))
+ return self.a
+
+ def update_state(self, percept):
+ """To be overridden in most cases. The default case
+ assumes the percept to be of type (state, reward)."""
+ return percept
+
+
+def run_single_trial(agent_program, mdp):
+ """Execute trial for given agent_program
+ and mdp. mdp should be an instance of subclass
+ of mdp.MDP """
+
+ def take_single_action(mdp, s, a):
+ """
+ Select outcome of taking action a
+ in state s. Weighted Sampling.
+ """
+ x = random.uniform(0, 1)
+ cumulative_probability = 0.0
+ for probability_state in mdp.T(s, a):
+ probability, state = probability_state
+ cumulative_probability += probability
+ if x < cumulative_probability:
+ break
+ return state
+
+ current_state = mdp.init
+ while True:
+ current_reward = mdp.R(current_state)
+ percept = (current_state, current_reward)
+ next_action = agent_program(percept)
+ if next_action is None:
+ break
+ current_state = take_single_action(mdp, current_state, next_action)
diff --git a/requirements.txt b/requirements.txt
index c4a6dd78f..dd6b1be8a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1 +1,18 @@
-networkx==1.11
\ No newline at end of file
+cvxopt
+image
+ipython
+ipythonblocks
+ipywidgets
+jupyter
+keras
+matplotlib
+networkx
+numpy
+opencv-python
+pandas
+pillow
+pytest-cov
+qpsolvers
+scipy
+sortedcontainers
+tensorflow
\ No newline at end of file
diff --git a/rl.ipynb b/rl.ipynb
deleted file mode 100644
index 103c32e9e..000000000
--- a/rl.ipynb
+++ /dev/null
@@ -1,590 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {
- "collapsed": false
- },
- "source": [
- "# Reinforcement Learning\n",
- "\n",
- "This IPy notebook acts as supporting material for **Chapter 21 Reinforcement Learning** of the book* Artificial Intelligence: A Modern Approach*. This notebook makes use of the implementations in rl.py module. We also make use of implementation of MDPs in the mdp.py module to test our agents. It might be helpful if you have already gone through the IPy notebook dealing with Markov decision process. Let us import everything from the rl module. It might be helpful to view the source of some of our implementations. Please refer to the Introductory IPy file for more details."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "from rl import *"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "collapsed": true
- },
- "source": [
- "## Review\n",
- "Before we start playing with the actual implementations let us review a couple of things about RL.\n",
- "\n",
- "1. Reinforcement Learning is concerned with how software agents ought to take actions in an environment so as to maximize some notion of cumulative reward. \n",
- "\n",
- "2. Reinforcement learning differs from standard supervised learning in that correct input/output pairs are never presented, nor sub-optimal actions explicitly corrected. Further, there is a focus on on-line performance, which involves finding a balance between exploration (of uncharted territory) and exploitation (of current knowledge).\n",
- "\n",
- "-- Source: [Wikipedia](https://en.wikipedia.org/wiki/Reinforcement_learning)\n",
- "\n",
- "In summary we have a sequence of state action transitions with rewards associated with some states. Our goal is to find the optimal policy (pi) which tells us what action to take in each state."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Passive Reinforcement Learning\n",
- "\n",
- "In passive Reinforcement Learning the agent follows a fixed policy and tries to learn the Reward function and the Transition model (if it is not aware of that).\n",
- "\n"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Passive Temporal Difference Agent\n",
- "\n",
- "The PassiveTDAgent class in the rl module implements the Agent Program (notice the usage of word Program) described in **Fig 21.4** of the AIMA Book. PassiveTDAgent uses temporal differences to learn utility estimates. In simple terms we learn the difference between the states and backup the values to previous states while following a fixed policy. Let us look into the source before we see some usage examples."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "%psource PassiveTDAgent"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "The Agent Program can be obtained by creating the instance of the class by passing the appropriate parameters. Because of the __ call __ method the object that is created behaves like a callable and returns an appropriate action as most Agent Programs do. To instantiate the object we need a policy(pi) and a mdp whose utility of states will be estimated. Let us import a GridMDP object from the mdp module. **Figure 17.1 (sequential_decision_environment)** is similar to **Figure 21.1** but has some discounting as **gamma = 0.9**."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "from mdp import sequential_decision_environment"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {
- "collapsed": false
- },
- "outputs": [
- {
- "data": {
- "text/plain": [
- ""
- ]
- },
- "execution_count": 4,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "sequential_decision_environment"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "**Figure 17.1 (sequential_decision_environment)** is a GridMDP object and is similar to the grid shown in **Figure 21.1**. The rewards in the terminal states are **+1** and **-1** and **-0.04** in rest of the states. Now we define a policy similar to **Fig 21.1** in the book."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "# Action Directions\n",
- "north = (0, 1)\n",
- "south = (0,-1)\n",
- "west = (-1, 0)\n",
- "east = (1, 0)\n",
- "\n",
- "policy = {\n",
- " (0, 2): east, (1, 2): east, (2, 2): east, (3, 2): None,\n",
- " (0, 1): north, (2, 1): north, (3, 1): None,\n",
- " (0, 0): north, (1, 0): west, (2, 0): west, (3, 0): west, \n",
- "}\n"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Let us create our object now. We also use the **same alpha** as given in the footnote of the book on **page 837**."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "our_agent = PassiveTDAgent(policy, sequential_decision_environment, alpha=lambda n: 60./(59+n))"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "The rl module also has a simple implementation to simulate iterations. The function is called **run_single_trial**. Now we can try our implementation. We can also compare the utility estimates learned by our agent to those obtained via **value iteration**.\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "from mdp import value_iteration"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "The values calculated by value iteration:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 8,
- "metadata": {
- "collapsed": false
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "{(0, 1): 0.3984432178350045, (1, 2): 0.649585681261095, (3, 2): 1.0, (0, 0): 0.2962883154554812, (3, 0): 0.12987274656746342, (3, 1): -1.0, (2, 1): 0.48644001739269643, (2, 0): 0.3447542300124158, (2, 2): 0.7953620878466678, (1, 0): 0.25386699846479516, (0, 2): 0.5093943765842497}\n"
- ]
- }
- ],
- "source": [
- "print(value_iteration(sequential_decision_environment))"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Now the values estimated by our agent after **200 trials**."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 9,
- "metadata": {
- "collapsed": false
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "{(0, 1): 0.4496668011879283, (1, 2): 0.619085803445832, (3, 2): 1, (0, 0): 0.32062531035042224, (2, 0): 0.0, (3, 0): 0.0, (1, 0): 0.235638474671875, (3, 1): -1, (2, 2): 0.7597530664991547, (2, 1): 0.4275522091676434, (0, 2): 0.5333144285450669}\n"
- ]
- }
- ],
- "source": [
- "for i in range(200):\n",
- " run_single_trial(our_agent,sequential_decision_environment)\n",
- "print(our_agent.U)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "We can also explore how these estimates vary with time by using plots similar to **Fig 21.5a**. To do so we define a function to help us with the same. We will first enable matplotlib using the inline backend."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 10,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "%matplotlib inline\n",
- "import matplotlib.pyplot as plt\n",
- "\n",
- "def graph_utility_estimates(agent_program, mdp, no_of_iterations, states_to_graph):\n",
- " graphs = {state:[] for state in states_to_graph}\n",
- " for iteration in range(1,no_of_iterations+1):\n",
- " run_single_trial(agent_program, mdp)\n",
- " for state in states_to_graph:\n",
- " graphs[state].append((iteration, agent_program.U[state]))\n",
- " for state, value in graphs.items():\n",
- " state_x, state_y = zip(*value)\n",
- " plt.plot(state_x, state_y, label=str(state))\n",
- " plt.ylim([0,1.2])\n",
- " plt.legend(loc='lower right')\n",
- " plt.xlabel('Iterations')\n",
- " plt.ylabel('U')"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Here is a plot of state (2,2)."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 11,
- "metadata": {
- "collapsed": false
- },
- "outputs": [
- {
- "data": {
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYcAAAEPCAYAAACp/QjLAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJztnXmYVMXZt+/ZIMIgAygiu8ENxQWDShR13FAxLjEuwS0a\nEzX5fF3eJKLRKEaNJppoVGI04hKjYtRoJC4R8zrghojIIouyB1AEBWSHGaa+P54u+/Q63T3dMz1n\nfvd19dV9tjp1qs+pXz3PU1UHhBBCCCGEEEIIIYQQQgghhBBCCCGEEEIIIYQIJQ8DnwMzUmw/B5gG\nTAfeBvZtonwJIYRoRg4DBpJaHL4NdIz8Ph6Y2BSZEkII0fz0JbU4BOkELC1sVoQQQmRCaXNnIMBF\nwMvNnQkhhBBNQ18athyOBGZh1oMQQohmpry5M4AFof+CxRxWJ9thv/32c9OmTWvSTAkhRAiYBuyf\ny4HN7VbqDfwDOBeYl2qnadOm4ZzTxzluvPHGZs9DsXxUFioLlUX6D7BfrpVzoS2Hp4AjgB2AJcCN\nQEVk2wPADZgr6f7IulrgoALnSQghRAMUWhyGN7D9R5GPEEKIIqK53UoiS6qrq5s7C0WDyiKKyiKK\nyiI/lDR3BjLERfxnQgghMqSkpARyrOdlOQghhEhA4iCEECIBiYMQQogEJA5CCCESkDgIIYRIQOIg\nhBAiAYmDEEKIBCQOQgghEpA4CCGESEDiIIQQIgGJgxBCiAQkDkIIIRKQOAghhEhA4iCEECIBiYMQ\nQogEJA5CCCESkDgIIYRIQOIghBAiAYmDEEKIBCQOQgghEpA4CCGESEDiIIQQIgGJgxBCiAQkDkII\nIRKQOAghhEhA4iCEECKBQovDw8DnwIw0+9wDzAWmAQMLnB8hhBAZUGhxeAQ4Ps32YcCuwG7AxcD9\nBc6PEEKIDCi0OLwJrE6z/WTgscjv94AqYKcC50kIIUQDNHfMoQewJLC8FOjZTHkRQggRobnFAaAk\nbtk1Sy6EEEJ8TXkzn38Z0Cuw3DOyLoGRI0d+/bu6uprq6upC5ksIIVocNTU11NTU5CWt+FZ7IegL\njAX2SbJtGHBZ5HswcHfkOx7nnAwKIYTIhpKSEsixni+05fAUcASwAxZbuBGoiGx7AHgZE4Z5wAbg\nwgLnRwghRAY0heWQD2Q5CCFEljTGciiGgLQQQogiQ+IghBAiAYmDEEKIBCQOQgghEpA4CCGESEDi\nIIQQIgGJgxBCiAQkDkIIIRKQOAghhEhA4iCEECIBiYMQQogEJA5CCCESkDgIIYRIQOIghBAiAYmD\nEEKIBCQOQgghEpA4CCGESEDiIIQQIgGJgxBCiAQkDkIIIRKQOAghhEigvLkzkClXXglffQX33Qf/\n+Ae89hpcdBGsXQsvvQR33gkdOiQ/9uWXbf+1a2HjRigrg7vvhh13tO2bN8Mzz8DkydC5M9x4Y9Nd\nV5C5c+Gjj+C7381/2s5BSUn6fbZtg6lT4eOP4dNP4dJLobIyur2uDubMgVmzYPFi6NgRLr44dVoz\nZ8K8efDf/1rZr1sH69dDba1tr6+Pfn/nO3D22fm73mT5WbDAru3gg6P/vUjOxo32H3/xBaxcCV9+\naf/dhg2w++5w5pnNncPiYts2K5vtt2/unOSPBqqLosEdcohj4UJ44AH44Q/h5JPhgw/g889h+XKr\nVPfeO/HA+++HP/zBKrHOnaFdO/jVr+CJJ6yS2LIFhgyBLl3s+D/8wSrSpmTFCjjvPJgxw65nzZrU\nQhfPE0/AuefCqFHw058mbl++HK6/Hp56yirGnj0T91m3Dn7/exPenXaCffaBiRPhL3+BY4+1iv3m\nm+GvfzVBGDAAdtkF/vQnO7Y80MSYOxfuuMPEdqedYM89oXdvqKoyoamshDZtoLTURLq01ER52TJ4\n9tncyi8VmzfDk0/C00/DW29B166waJH9x1ddld9zZcOKFdbA2WMPOPLI5suHxzn7v//zHyunWbNM\nEHr1sjLbcUd7djp0gIoKePhh214aAr+DczB/Prz3Hrz/PvTvD5dckn7/BQvg3Xdh0iS73+fPNyHd\nutWe365dmy7/DVFiLcKWUs/nhLv3XucGDnRu2DDnfvEL5yZPdg6cu/xy5/bbz7kpU1wCtbXO9elj\n+wYZPNi5t9+233fd5dx3vuNcfb1zy5c7t+OOienkSn29c+PGpd9n82a7rmuucW7rVuc6d3Zu5crM\n0r/jDud22825c85x7ic/Sdw+b55zPXs6d/XVtl+yMlqyxLndd7c05s6Nrr/kEufuu8+5Dz+0NC66\nKHa7c1a2wXV/+5tzO+zg3I03OrdsWWbX4JxzY8c6d8IJme+fCa++avk+4QTnnnnGudWrbf311zv3\n61/n91yZsnKllWunTs7tu69zF16Yn3S3bXPuo4+yP27dOuduv925fv2c69/fuZ/9zLkXXnBu/nzn\n6upSH7fbbs4991zq7W+95dwFF0TLvBiZP9+5ESPs3t95Z+e+9z3nLr3U6oZkvP++c1dcYfdUz57O\nnXGGPX9jxzo3a5ZzGzdaPfTBB5nnYfny7PbPBaCJm7pNj5s2zbkhQ5wbMMC5UaOsUm3Txm7SQYOc\ne++9xIL54AO76eM59FDnJkyw39XV9gc759yKFc516ZK/P+att0zA0nH33SZ49fW23K1bbMX64IP2\n8MczZYpzO+1klftLLzk3dKitnz7dxGDrVucOOMC5P/7R1h96qHNvvhmbRm2tc9/6lnO/+U1i+r//\nvXNnnmkPztNPJ8/7ccc5969/2e+//tXEYvr09NebjP/8x/6HxrBmjXNbttjv++5zrlcvSzee22+3\n8mlqxo2z//bKK+0+e+YZ57773canu3Spc4cfbvfZxo2ZH/fPfzrXvbtzw4fbs+Pvv0z44Q/tfPGN\nhcWLnTvxROf69rXt48dnnmZTMWeOlXuXLs79/OdW6ftrX7rU/qMgr71m5du7t3M33eTczJmp0z7u\nOHsWG2LePOfOO8+5ykp7ZgoJrUEctm2zwt9hB+cee8wu/M47nfvqK+cOOcQq4nieftpaBPEcfrhz\nb7zh3Nq19getX2/rv/jCWu754vzz7SFJ9eDV19uDNHFidF2fPs4tXGi/vXWUzJI4+WTn7r3Xfs+Z\nY60/55z79rftmIcftgrXn/u445x75RX7ff/9Vjb33OPcMcckz9/YsZbONdekvr7LLzcRWbzYHrYZ\nM1Lvm4533nHu4INzO9Y5a6GCcyNHWqXbo0e0DOO5917nfvrT7NKfONG5//mf3PP34otmkb7xRnTd\nuHHOHXVU7mk6Z+W9887O3XKLieGCBQ0fU1/v3HXX2X2W7JnJhK1brbERFN+XX7Zn85ZbTKSHD3fu\n8cdjj1u6NDuLMleWLbPWvHMmfF99ZZbQzTdbHu+4I/rMB9m2zbm2bU1kv/jCudNPd+6b33TuySet\nIdUQP/iBc6NHp96+caNzV11lz8rIkXaOtm2d27Ahp8uMYeLE5J4BGiEOLSYgXVoK7dtbYMwHSX/2\nM/uuqDB/Xzzz5sGuuyauLyuzANLcudCvn6Xrz1Ffn3meVq82X2yyGIVzMG6c/d62LdYv75k2zfJ+\n8MHRdW3aWBwE4JFH7Lu2Nva45cthwgQYM8aW+/aFJUvMnz5nDrRtCyNHwujR0SB0ZaUFFOvq4Cc/\nsXLZssV838kC1fvsY9/XXZf6+vfay3y1M2damgMGpN43He3aWQA0V0aMsDRee81iL6++amWSjMpK\nCxxmyurVcNZZsN12ueVt9myLkb38Mhx4YHR9x44WW2qIH/3I/v/HHotdP28eHHecxYqGD7dOGZ9+\narGgdPzyl1Y+kyfDDjtkfz1g92yfPrB0qS2PGWMdRl58Eb79bVvXu7d1RPDMmmUxvWOOgRdeiD5z\n+WbqVDj+eOjeHc4/3+qIq6+2Z23jRtveo0fyY0tLLc7y4ot2T33vexZny/S/79bNYg5gZbL77nDA\nAbY8dy6ccgrsv789o77s+/Wzbfvtl9v11tXZs37rrXDiifCvf+WWTjJaVEipfXurdIM9aMBu1vgK\ndNUqCxqlE4fFi+0m92QrDjNmpN42f76dp7w8mubWrVYR++V//9tu5CBt2kSF7rXX7NuLhef55+1G\n8Ddt27YWaL7lFjjtNBg0yMrp6KOjx3hx+M9/rLwWLbKHZNCg5Pnv08fyGV/WQQ480ATw+eetcsiV\n7baDTZtyO3bWLKts3noL3nkHLrgg+kAmo317K4dMufpqE8H4+ysTtmyB738fbrstVhjAAvRffZX+\n+DffNIGfMyd2fW2tCcLVV9s3wM47w2efpU/vySfhuefg9ddzFwZPjx7WiWDCBLj8ckvTCwPEisPS\npXDCCXDXXbZfunvKM3q0CTNY8HvgQBPYk06y/zwZU6aYYN5zj/Wy+tOfrDPC7bdb54jXX08tDJ6+\nfe0e+t3vTHizaRR062YNt5tugnPOsc4iYIHrIUPgiivsPwiW/Z57WkeRdCxaBH/8Y+L6DRtMcCZO\nNEH79NPM85oJLU4cILEnT5s29qesWhVdd801pqL9+iWmExSH3r2j67MVh3R/6rvvwqGH2rl8mn7/\nujr7fvNNqK6OPa5tWxOHJUusZdmvX6JV9NprJg5BBg2y1uXJJ1sL/rTTYi0CLw7//CfccIOtO+OM\n9NfXUNfXffaxB/fkk623V65st13mlsOoUbGi/NvfmjDtv789KCNGpD8+G8th/nwTvltvzU0cRo+2\nSvuiixK3VVWltxy2bLEedpdfbvd3kN/8xiqYyy+Pruve3SqhAQOSW9ELF1rl9PTTjfuvPD17wvTp\nZlX97W+JVmOfPtaz8LbbTCAvucT+p/vvt+3p/u+HHzaL6fXX7Tk9+2xr7Jx2mllI8WIJ9vyfeqrd\nH2eeaSL41lvW+n/tNfsvklnv8fzoR1bR5tJVt1s3K4u//x0efNAs6mnTTNBGj07eC6p/f9svFYsX\nW482/8x6Nm60hmXXrmYJHnGE1S++bgGzDouZ44E5wFwg2WO7A/AqMBX4CLggRTrOOetNAYlBz1NO\nsfW77hpdd9VVzh1xhAWu4xk2zHzqV11l/kfP+vXOtWuXuZ/vqqvsvMkCxtdfb712ttsu6t98/HHb\n3y/H9/ZxLtqT6u9/d+7UU53be+9YX359vQWiFy2KPe6OO5wrKTH/+8qVFqANMmKEc7fdZuecNcu5\niy+2YHZjOeGExgcev/jCevA0RG2tld/Pf27Lq1Y517Fj5r27nLO8Dhliv597Lr0v+YILzDe8dKn5\n9rNh0yaLfUyalHz7li3OlZenjkfddZcFd6dNs3vAs3y5xcX++9/Y/W+91dID5z75JDG9M84wn3u+\nePZZO9eVVybfPm+ebS8ttdhX8BnZay+LiR16qHMPPWSxP8+CBRYXOOEE6yhxyy0WI1y92v73c86x\nThpBtm2zZ33kyPxdXy68/749X//9r/WG6trVlseMSX3MCy84d/zx0eXPP4/+h2vX2n//+9871759\ntPdXXZ1zJ53k3LnnxparSajdUx9+aOVII2IOhbQcyoD7MIHYCxgO9I/b5zLgQ2B/oBr4PWniIN5y\nSOZWAjOrH3rIPuvXm8ndtm2SjGXgVlq40MZRgLVA1q1LTMe3YLZtS9zm4x1By2H6dPuuq7OxAytX\nJvqIvVtp5kxrjfnliRPh9NMtz6WlsRYPwOGHm6VSVWWtyo4dY7dXVsInn1i57LmnteqSjXnIlpdf\ntnM3hkwshyVLov7U7t3t+7nnbBxGNi4SbzksW2atymStUDC3xPPPW2s7VUxr0ybbJxmPPGKukHh3\nkqdNG0s32XXX1poL5sYbLabl3Stg7o5zzjHfeJDu3W3fgw+2fvhBJk2Ct9/O79iOPfc0d9vNNyff\n3q+fVVW//rVZtMExEbvvDpddZu6hH/3Ini+w/S++2Nxlp51mLeK77jL3TFWVjZ/p0cPijkEefNAs\nreuvz9/15cKgQVb2vXqZe2r9erNAzjor9TEHH2xxO1+1n3++rX/rLbjwQjjkEPvf+vSxZx/Mfbxu\nnVkjwXK96y77njzZrPlRoxp3PYUUh4OAecAioBYYA5wSt89ngB9TuD3wJVBHClKJgze727SBH//Y\nbrD161P7Nr04LFsWW0EGxeGYY6L++MMOswFinlmzrGLwPt5kLgcvDqWlUfHwbqXaWkujf3/LSxDv\nVpo50x4+H6AePdoqw6lTzace7/I56CBzU6WistLiA4MHN+wuamq+8Q275nQuvXPPtUqxoiIaM3j+\neavgs8HHHJ56ypZ9ADGeMWPMdVdVlTymBRbrOP302MCr57HHkg9KDJIqKP3ss1a5HHggdOoUdZeu\nWmUul2uuSTzm9NMtnrTfflYhB0Xv17+2gZ/5DALvvbcNPG0ofnDddYkNmT32MJdkTY09A/45GjfO\nGgFXXWUCMmGCBZSDz2jnzrHisHKlXdtDDyU+S82Br6xLS+GNN8wFmI5u3WxU9dy55nJbtcpcpbfe\nao25e++159WLw5tvwp//bIIZ72688koThVNPNVdeY0exF1IcegBLAstLI+uC/AXYG/gUmAZckS7B\nVDEHbzn47/LyzMRh7drYFnYwWBz/0AYDU3vvba0YX7HUxcmZc/Znx1sOvndHXZ2Jx267JebNWwq+\nd4cXC3+uTz6xhytbKivt/AcdlP2xhaa01K5z8+bk29ets9bvxo3WClu/3sR5wgQLdGZDMPbSqVPq\nIN6zz9oDBqnF4bnnLND55JOx6+fNM8vz2GPT5yVVUHrUqGgrv107u382bzZf9tChUcsp/roGDLBt\n77xjlQpYhfLuu9EWaT7JtZFx9dUwdizsu6+J+5Il9sz88pdmiZSX27bTTku0drp0iY0t3nGH3RPJ\nZkdobg46KLM4x+DB1tC44QbrHTV4sAWhH3kk6vno08fuq4svtkB7snsAolPDpLLosqGQXVkz8XX9\nEos3VAP9gHHAfkCCE2fkyJFMnWo35LvvVnPkkdVfb/Oi4JU0U3FYty52n9LSaLfUtWtjj4kXpNpa\na7W0b58oDn7+oC5dYq2RpUttubY20WrxtGljFcH8+SYeXiyWL7ftn3yS2lWRDn+dxfgQQbQ7a7t2\nidsmTLCeMNdea9c/c6a52QYMSHSfNUT79tZSXb/eKswnnrBW7P77R/f58kv48EOzHiG5OGzcaC3d\nSy9NdHM89ZS12hqqGJIFpT/7zK7Pi15JiYnYtGlWcaTrWgzwzW/at+/9NXq0WVzJyrW56Nw5+rtn\nT3ODjBtnFrK3BKuqTHzj6dIlWt4rVtj1TZtW+DwXksGDTTCvuMLcdbvvblN5DBwY3We//Uw8Bw82\nyyAVgwbVsG5dDbfd1vh8FVIclgFBz2gvzHoIcghwa+T3fGAhsAeQEGcfOXIkzzxjpmj8fDReFOIt\nh1TzE3lxiN8nWJHX1Zlqe7Ho0MFuYO9q2rzZ1pWXJ4rDypWm3iUlUbfSli3mO+7Rw/Zftiz6IAdp\n29a2dehg1koycTjnnOTXlQ4vDnvumf2xTUG67qzvvmtdAY89Nlqx19Qk9vTKBG99Hnqo+cVHjbKy\nnTrVWm4jRph75rDDotZiRYX9Z8HJC1991VqG3bsn+viffdZadw3RrVti91PfTTkYK1u1yiqFnXay\nrprpOO88i2P9/Od2340ebV2mi5Veveya//xn+H//r+H5moLiMGqUWQ35iJ01J4cdZoLphb+0NFYY\nwNzl8+fbeKJ0FtvQodUMHVr99fJNN92Uc74K6VaaDOwG9AXaAGcBL8btMweItM/YCROGuEctSvv2\nySv8fFkOJSXRwBCYL9AHoktKzHc7caItr11rD2s6cfDnqq+3SqBbN3vovTgk63Pdpo1VNn6bjzms\nWGHLs2cnd0c1hC+bZOM+ioF0A+EmTYq6wyor7T/xXYWzxd8rBx0UnSBtt90szZtvNl/9G2/ENkD8\nJIHB//mVV6yLYnDQIljFtWhR7MDGVPTta/t6/vY3y0N8HMVbLbffnpmbon9/u08mTTKrI9fBiU1B\nr14mzDU1mTV6fMyhrs6E79JLC57FgvOtb9l9UFWVep+yMnOhJWtQFopCikMd1hvp38As4GlgNnBJ\n5APwG2AQFm94HbgaWJWQUoT27ZNX+Kksh3TisGGD7eePARMALxBg4uD9m3V1VoF4N8CKFSYOyVwO\nX3wR7UHjLYelS62FU14edSulEoeFC6Pb2ra1eEPbtnbzrFplfeezxV9Tst5bxUAqy8E5M7HjxeGD\nD+yhypWBA6MPY319VPTXrUtuncb/z+PHm+XiY0KeN980F1gmlXjfvvZfg13neeeZFRNvHRx5pMUb\nLrggs2vr0sUaUaNG2VToxcxee1k5XnhhZjMR77ijPUujRtkzsu++Bc9ik/CNbzR3DhIp9PQZr0Q+\nQR4I/P4COCnTxHr3Tt4iy8Vy+Oqr5NuDrqV4cVi7NuoGWLzYWpxLlmRmOXz6qbkg/BQWqcShbVuz\nHA45JHpNixfbsZs2WSWaS6+MoUOjAfFiJJXlsGSJXbNv5XfoYD75tm1zE0mwAPKJJ9p98q9/WYvc\nd6dcsMDOGV/pBMXhs8+s9TpggHVPDorD+PE2ICkTdtklOsXKRx+Zm2v27NgGC8D//V/21zhokMU+\nxo/P/timpKzMeoZlOk3+TjvZ//W//9v4rpoiPS1qhPQuu8Cjjyauj++tVFGR6DIKUlaW+p0J6cRh\n3bqoOHz2mZm4DbmVfHp+Hqby8mgMIVnl5i0H70dt08ZMzp13tvzkWiGWlDQ8dUBzkspymDkzNohe\nWWllF++TzYbhw+0eKSmxCnnFCrNEunQx//zAgYkt/6A4TJpkjRTfyyroVspGHIJupfHjzUKIF4Zc\nGTTIgvWDB+cnvUKTTc+nn/7UnpELLyxcfkQLmngvHfFupfp6u9ni+wF7MrUcOnZMbTl88YUdn4k4\nbNtm4tCpk+Vx9WozI5Plr00ba0F7EfDi0K+fVU7Z9s5pKaSyHOJf4pTvXlddu5o4rF9vvZNeeSX5\ndBdBcZg8OdoxITgX1po11oU51XxV8Xzzm1bJ1daa6+ywwxp/PZ4TT7R7LxP3Vksk10aSyJwWZTmk\nIt6ttHlz+sE56SyHkpLo3DsVFVFx2LjRKgEvDnV1do6GYg7erbRmjfm4y8tNPFIFn3xMwB/ftq25\nlbzlkKp/c0sn3nfvmTXL/NIe/5/lq9eV7066erUNLly7NrZbqyedOHjLYdo06xabqlEST2WluUpn\nzTJxyFRUMmHgQOv6KESuhEoc/Hcm4pDOcvBB5/r6aGvWT2Hgu5RCasvhyy+jk5sFLQc/2nblSrMi\nkuErFr/dTyq4885mNYS1xeQD9fHMm2f9vj357pLru06edVZUkJP17gmKw9SpUQEJipqf8iQbBg2y\n3lGLFxfvGBTROgmF0RmcPgPsYU036MdbDskq2tLSqO9727bo7+AUBn6UbapxDuvWRV80HrQcOnVq\n2HLw1+AHCvnlrl2tBd0/fnaqkJBqFPKCBbHd97xllUt33lRs2mRuPj/oKpnw+PytWmWWpZ/bKGg5\nfPRRbuLwwANmceQr3iBEPgil5RD/O56GLAdvLfhpCyBqOaxZE624U1kOGzbEvkAoF8shXhyqqmxi\nsWznEmop+IFmQTZtMhddMJBeUmKuPR/TyQfx3QiTNSy8OPgAuQ+gBi2HXMThqKOsh1I+XUpC5INQ\niUPQ15uuu6e3HBoSB285lJVFLYfNm6Muo1Qxh2A3Wh/gztRy8C1jLx7xy2El6FaqrTUxXbTI5pSJ\n/y+7dStMHk49NfWLZOLFweMtB+cSt2XCgAF2PRIHUWyEQhzi3UolJel7aZSVmesnWQsx6Faqr7ff\nHTrETpvsW/Wp3EpBcfBupUwtB99TyuctaDmEmaDI/uIXVsYLFzb82st8UlaW2m3n8/fxx7FuJ285\nLF9u944fj5EpJSU25fRJGY/2EaJpCIU4JHMrNSQOmzcndz0lcytVVsbOBBnvVho6NPpKQEi0HLZt\ny9xy8Of2bov4AHVYCYrs1Kn2vWRJ4nTPzYUXh/j3knvLwbuUcpmp9KST8vN2NiHySajEISgIDbmV\namuTdzn04lBSEnUrpbIcvDiAvbDDs2FDrOXgxaFjx4Yth/hXWLYWcQhaDn4aaz+qvBjw+Zs/P/bV\ns8H3b6i3kQgToRAHX4H6IfiZuJUgueVQUmKC4OfR9+Kwfn20B1J8zCGYVm2tfXyswLup/Gja8nKz\nQlINZvMvsoknmxedt0SCAWnflTjVFCPNQUWFWZELF8b2nvKD4FK9n0OIlkooxCFYMYO11BuyHILH\nBfGVebt2lo53K23cGB2AFR9zgKhAeavBuxfKyqzCD07/DFGhiec734l9gY23JIrt7W35JhiQ9uJQ\nbJbDokVmwQXfqObdSk0dHxGi0IRqnIPvdlpbm7vlEBQHbzn4+EC8OLRvHz2PTyt+wr/SUlvnA8x+\n/1TjMI44InZunoberRwWkrmVis1ymDfP5kMK4t1K8RaFEC2dUFkOwYnbGiMO/o1kwZgDRFv7nTtb\npVBRkWg5NCQO/pyZvpmrNYlDXZ2VuXM29qCYLIc2bSzeEB8gD1oO8cIhREsmtOLQGLeSFwffWyle\nHHr0sPn1g2n472Aw2p8rG8shnu9+F849N7N9WzLerTR/vpXv1q3mXsrnYLfGUFFho7X9yGiPn37j\nG99IP2WLEC2NUIiDH+GaL8shWUAaot/t2sHJJ8eeJ2g5BH3SpaU2piI+5pCpOBxwADz+eGb7tmS8\nW+mjj+yay8qi7+AuBrw4pOpaW6iBeUI0F0Xy6DWOAQPgV7+KxhygsG6lYM+hhmIOjbUcWgverTRj\nhv2f7dtHJ8IrBioqYudUiqdYLBwh8kUoxKGkxLoRBqd8zqdbyVf26cTBfyezHBoTc2gteLfS7Nk2\nwWD79sVV4fr/TeIgWguhEAcwgQhOY5GPcQ6pLIdgxR58wRBEj/Vk21upteLdSitW2Gy5lZXFVeF6\n99YeeyTfnu20GUIUO6ESh23bosv5iDnEi4P/TmY5+HNv3Zo4AWBwnIPfP+yD2rLFu5VWrbLeYMVm\nOfjXeaaohB79AAATFElEQVQKOhdTXoXIB6ERBz+HkW/h5WMQ3JYtJjo+4L399la5J5vDyYvDli3R\n0dE+veAkf36/dPlrjXi3UrGKQ6rZWj3FlFch8kEoBsFB1HIoL7fWez4C0hs2WAvfp9WpU+IEaQ2J\nQ3xAOtmrMEXUreTfoldsbqWjjko95uTBB+GMM5o2P0IUmtCIg7ccKiqsAs5HQHrTpuh8SGCV1Zw5\nsfv7NNJZDhs2RMUh2dvOhJXx+vVWPu3b21QUxTRX0ejRqbf9+MdNlw8hmorQiIMPSMf3HkpGNm6l\ndu1ixzLET7XdUMwh3q0kyyE5FRUWjO7c2f7LUaOaO0dCtG5CF3NINn13PJlaDlu2WDo+raBF4MnU\nreQD0LIcklNRAZ9/Hp23SgjRvIRGHHzMwVf4jXUrBUc0N0Yc4ruyynJITnm5iYNeeiNEcRA6cciH\nW2nLltiup+nEoaGYQ1lZ7NgHiUNyKiqsDGU5CFEchEYc8uVW8iLj4wZBcUj25rhMYg4Q7Q6bLl+t\nGf9fhP1d2UK0FAotDscDc4C5wIgU+1QDHwIfATW5nsgHpPPhVoLk4pDMcvDr0lkOwf1+9zv48MP0\n19Ia8WWc6g15QoimpZDt2DLgPuAYYBnwPvAiMDuwTxUwCjgOWArkPNVaPgPSkLk4fO97MGUKTJgA\nd92VPOYQTK+qCvbfP7Nrak009IY8IUTTkk4cfha37ICVwFvAwgzSPgiYByyKLI8BTiFWHM4GnsOE\nAeCLDNJNSnxAOl/ikOyFPkEqK20a57/+FSZPTu1WSnYuEUXiIERxkc6t1AGoDHw6AAcCrwLDM0i7\nB7AksLw0si7IbkBn4A1gMnBeRrlOQnxAurFupaDIpLMcfHqbN9v5U7mVkgmLiOLLWOIgRHGQznIY\nmWJ9Z+A/wFMNpO0yOH8FcABwNNAOeBeYiMUoYjMzMpqd6upqqqurY7Y3hVspVQWfThzi0xPJ8f+F\nYg5C5E5NTQ01NTV5SSuXmMOqDPdbBgRnv+9F1H3kWYK5kjZFPhOA/WhAHJJRUmLf+ejKCrHiUFFh\ny/4cqdKT5ZA7cisJ0XjiG8433XRTzmnl0lvpSGB1BvtNxtxGfYE2wFlYQDrIP4EhWPC6HXAw0MD8\nl8mJdwflu7dSuso9KA6pYg4Sh/TIrSREcZHOcpiRZF0n4DPg/AzSrgMuA/6NVf6jsWD0JZHtD2Dd\nXF8FpgP1wF/IURx8qz5Tt1JJSXIB8ekExaGqCq68Mn16kN6tpIB0euRWEqK4SCcOJ8UtO+BLYH0W\n6b8S+QR5IG75zsinUfhKONOAdKrKOllvpYoKuPnm9OmB3EqNQZaDEMVFOnFY1FSZyAe+xe+tgoYs\nh4bEIRMLJJgeRN1KCkhnj2IOQhQXoZk+w4tDaalV1ukq9dLShsWhvNx+ZyMO9fVmOcS/JhQkDg0h\ny0GI4iI04uArdS8OjXUrlZVlLw7qypo7bdrAk09q7ikhioXQiIO3HHygubFupVwsBwWkc6ekBIZn\nMrRSCNEkhEYc4i2HdJX6DjvA0KHp0/HWRy4xB7mVhBAtndCIQ3zMIZ1bqWNHeOSR5Nvi3UqZtPj9\nuerq7E1vQSFwLnYfIYRoCYRGHLKxHNIR7PWUreWwaZOJSXAktZ/KWwghWhKhEYeg5ZBprCAZpaWW\nVjbpeHHYsiXR0qiryy0fQgjRnIROHHxAOlc3TlAQsrUctmxJ3F/iIIRoiYRGHPLlVvLH+9/ZiMPW\nrYn7y60khGiJhEYcshkEl46gIOQiDvEWi8RBCNESCY04ZDMIrqF0/LHpxkMESedWkjgIIVoioRGH\nYrAcFHMQQoSF0IlDPgLSQcshG3GorZXlIIQIB6ERh6BbaYcd7B0MuRCc0TVbyyH+N8hyEEK0TEIz\nzVnQrfTWW7mn0xjLAWQ5CCHCQSgth8amk2tXVpA4CCHCQWjEIWg5NIb4gHQ2vZVAAWkhRDgIjTh4\nUQjOa5RrOnIrCSFaO6ERh0JZDgpICyFaIxKHOPJtOdx9N7zySuPyJIQQTU1oeivlMyDdGMshfv/e\nve0jhBAtCVkOSdIJ9lZqbEBaCCFaIqERh3wGpHOdsjv+txBCtFRCIw6FiDnkw60khBAtkdCIQyFi\nDvkISAshREskNOJQCMthxx2hc+eGj8m2d5MQQhQ7oanKCiEO//hHZsd4UWjMu6uFEKKYKLTlcDww\nB5gLjEiz34FAHXBaricqREA6U7bbDk4+uXFThQshRDFRSHEoA+7DBGIvYDjQP8V+vwVeBXKu2gth\nOWRKeTk895wsByFEeCikOBwEzAMWAbXAGOCUJPv9D/AssLIxJytEQDpbFHMQQoSFQopDD2BJYHlp\nZF38PqcA90eWXa4nK8QguGyROAghwkIhq7JMKvq7gWsi+5aQxq00cuTIr39XV1dTXV0ds70QE+9l\ni2IOQojmpKamhpqamrykVUhxWAb0Ciz3wqyHIN/C3E0AOwAnYC6oF+MTC4pDMgoxZXe2yHIQQjQn\n8Q3nm266Kee0ClmVTQZ2A/oCnwJnYUHpIN8M/H4EGEsSYciEYrEcJA5CiDBQyKqsDrgM+DfWI2k0\nMBu4JLL9gXyerBCvCc0WiYMQIiwUuip7JfIJkkoULmzMiZqzK6tHMQchRFgIzfQZ6soqhBD5IzRV\nmbccGhuQ7tULamtzO1aD4IQQYSE0VVm+3Eqn5TyBhywHIUR4kFspj0gchBBhITTikC/LoTEoIC2E\nCAuhEQdZDkIIkT9CIw75Ckg3BomDECIshEYcZDkIIUT+CI04KOYghBD5Q+KQR2Q5CCHCQmjEAUwg\nmlMcNAhOCBEWQicOCkgLIUTjCZU4lJbKrSSEEPkgVOLQ3G4lBaSFEGFB4pBHKirsI4QQLZ1QOUFK\nS5s35nDnndCzZ/OdXwgh8kWoxKG5LYfdd2++cwshRD4JlVupuQPSQggRFkJVlTa35SCEEGEhVFWp\nLAchhMgPoapKm3sQnBBChIXQiYMsByGEaDyhqkrlVhJCiPwQqqpUloMQQuSHUFWlshyEECI/hKoq\nVUBaCCHyQ6jEQZaDEELkh1BVpYo5CCFEfmiKqvR4YA4wFxiRZPs5wDRgOvA2sG+uJ5I4CCFEfij0\nxHtlwH3AMcAy4H3gRWB2YJ8FwOHAV5iQPAgMzuVkcisJIUR+KHRVehAwD1gE1AJjgFPi9nkXEwaA\n94CcJ71WQFoIIfJDocWhB7AksLw0si4VFwEv53oyWQ5CCJEfCu1WclnseyTwQ+DQXE+mmIMQQuSH\nQovDMqBXYLkXZj3Esy/wFyzmsDpZQiNHjvz6d3V1NdXV1Qn7SByEEK2Zmpoaampq8pJWoT305cDH\nwNHAp8AkYDixAenewP8B5wITU6TjnGvYCNl1V3j2Wdh//8ZkWQghwkGJBWFzqucLbTnUAZcB/8Z6\nLo3GhOGSyPYHgBuATsD9kXW1WCA7axSQFkKI/NBSqtKMLIc99jDLYZ99miBHQghR5DTGcgiVh14x\nByGEyA+hqkrPOAO6d2/uXAghRMsnVG4lIYQQUeRWEkIIkVcK3VtJCCFyonPnzqxenXTYk4ijU6dO\nrFq1Kq9pyq0khChKSkpK0HOfGanKSm4lIYQQeUXiIIQQIgGJgxBCiAQkDkIIIRKQOAghRI5ce+21\n/PGPfyz4ecaOHcv3v//9gp8niMRBCCFyYOXKlTz++ONceumlAEycOJFjjz2WLl260LVrV84880yW\nL1+ecVrDhw+nR48eVFVVMWTIECZNmvT19pNOOomZM2cyY8aMglxLMiQOQgiRA48++ignnngibdu2\nBWDNmjVceumlLF68mMWLF9OhQwcuvPDCjNJav349Bx98MFOmTGH16tX84Ac/4MQTT2TDhg1f7zN8\n+HAefPDBglxLMjTOQQhRlBT7OIejjz6aiy66iLPPPjvp9ilTplBdXc3atWtzSr9jx47U1NQwcOBA\nAN555x3OPfdcFixYkLCvxjkIIUSRMGPGDPbYY4+U2ydMmMCAAQNySnvq1Kls3bqVXXfd9et1e+65\nJ4sWLWL9+vU5pZktmj5DCNFiydfLvXIxUNasWUOHDh2Sbps+fTo333wzL774Ytbprl27lvPOO4+R\nI0fGpO9/r1mzhsrKyuwznCUSByFEi6U5vU6dOnVi3bp1CevnzZvHsGHDuOeeezj00EOzSnPTpk2c\ndNJJHHLIIYwYMSJmmz9XVVVV7pnOArmVhBAiB/bdd18+/vjjmHWLFy/m2GOP5YYbbuCcc87JKr0t\nW7Zw6qmn0rt3bx544IGE7bNnz6Zv375NYjWAxEEIIXJi2LBhjB8//uvlZcuWcdRRR3HZZZdx8cUX\nJ+z/6KOPsssuuyRNq7a2ltNPP5127drx6KOPJt1n/PjxDBs2LC95zwSJgxBC5MD555/Pyy+/zObN\nmwF46KGHWLhw4dexgg4dOrD99tt/vf+SJUsYMmRI0rTeeecdXnrpJcaNG0dVVdXXx7/99ttf7zNm\nzBguueSSwl5UAHVlFUIUJcXelRXguuuuo2vXrlxxxRUN7nvcccdxzz33pO3hlIqxY8fyxBNPMGbM\nmKTbC9GVVeIghChKWoI4FAsa5yCEEKJJkDgIIYRIQOIghBAiAYmDEEKIBCQOQgghEtD0GUKIoqRT\np06+t41ogE6dOuU9zUKX/PHA3UAZ8BDw2yT73AOcAGwELgA+TLKPurIKIUSWFGtX1jLgPkwg9gKG\nA/3j9hkG7ArsBlwM3F/A/ISCmpqa5s5C0aCyiKKyiKKyyA+FFIeDgHnAIqAWGAOcErfPycBjkd/v\nAVXATgXMU4tHN34UlUUUlUUUlUV+KKQ49ACWBJaXRtY1tE/PAuZJCCFEBhRSHDINEsT7wxRcEEKI\nZqaQAenBwEgs5gBwLVBPbFD6z0AN5nICmAMcAXwel9Y8oF+B8imEEGFlPhbXLSrKsYz1BdoAU0ke\nkH458nswMLGpMieEEKL5OAH4GGv5XxtZd0nk47kvsn0acECT5k4IIYQQQggRDo7H4hBzgREN7BsG\nHsbiLTMC6zoD44BPgNew7r6ea7GymQMMbaI8NhW9gDeAmcBHwOWR9a2xPL6BdfWeCswCbousb41l\n4SnDBsyOjSy31rJYBEzHymJSZF3oy6IMczf1BSpIHrMIG4cBA4kVh98BV0d+jwBuj/zeCyuTCqyM\n5hGuubK6AftHfldi7sn+tN7yaBf5Lsdic0NovWUB8L/AE8CLkeXWWhYLMTEIEvqy+DbwamD5msgn\n7PQlVhzmEB0Y2C2yDNYCCFpTr2JB/bDyAnAMKo92wPvA3rTesugJvA4cSdRyaK1lsRDoErcuL2VR\nzKqRySC61sBORLv2fk70T++OlYknzOXTF7Oo3qP1lkcp1ur7nKi7rbWWxV3AL7Cu8Z7WWhYOE8rJ\nwI8j6/JSFsU8K6sGwyXiSF8uYSyzSuA54ApgXdy21lQe9ZibrSPwb6zVHKS1lMV3gBWYj706xT6t\npSwADgU+A3bE4gxz4rbnXBbFbDksw4KSnl7Eql5r4XPMNATYGXswILF8ekbWhYkKTBgex9xK0LrL\nA+Ar4CXgW7TOsjgEm5NtIfAUcBR2f7TGsgATBoCVwPPYnHahL4tMBtGFkb4kBqS9n/AaEoNLbYBd\nsLIK0+T3JcBfMRdCkNZYHjsQ7XGyHTABOJrWWRZBjiAac2iNZdEO6BD53R54G+uB1CrKItkgujDz\nFPApsBWLt1yI9UR4neTd0n6Jlc0c4LgmzWnhGYK5UqZiLoQPsa7NrbE89gGmYGUxHfO3Q+ssiyBH\nEO2t1BrLYhfsnpiKdff2dWRrLAshhBBCCCGEEEIIIYQQQgghhBBCCCGEEEIIIYRoSayPfPcBhuc5\n7V/GLb+d5/SFEEIUCD8nUzXREbWZ0tD8Y/HzPQkhhGgh+Ap8IrAGG219BTa32B3YS1KmARdH9qsG\n3gT+SXQisxewmS8/Ijr75e1AXSS9xyPrvJVSEkl7Bjaq+cxA2jXAM8Bs4G+BfN6OzbY6LXKsEEKI\nAuLFITgXD5gYXBf53RZ7T0JfrAJfj7mhPJ0i39thFb5fjrcc/PL3sKkLSoCuwGJsMrRqTKC6R7a9\ng82s2YXYGTW3z/TihCgExTwrqxD5Jn6SsaHA+VjLfyI2J82ukW2TsArdcwU2h8272MyWuzVwriHA\nk9iUyCuA8cCBkeVJ2BxaLpJmH0wwNgOjge8Cm7K9OCHyicRBtHYuw14kNBDoh01YBrAhsE81Ngvq\nYOydCh9i73VOhyNRjPzc+VsC67ZhU5Nvw6ZbfhZ7Z8GrCNGMSBxEa2Id0SmOwV6a81OiQefdib6r\nOcj2wGqsZb8nsa9WrCV50PpN4CzsGdsROByzGFJNkdwemz3zFez9yPs1eDVCFJBifhOcEPnCt9in\nYS30qcAjwD1YjGEKVmmvwFw68W/PehW4FJiFTSH/bmDbg1jA+QPgvMBxz2PvQZ8WWfeLSPr9SXz7\nlsNE65+YRVICXJXz1QohhBBCCCGEEEIIIYQQQgghhBBCCCGEEEIIIYQQQgghhBBChJn/D14FxN7T\nQhWsAAAAAElFTkSuQmCC\n",
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
- "source": [
- "agent = PassiveTDAgent(policy, sequential_decision_environment, alpha=lambda n: 60./(59+n))\n",
- "graph_utility_estimates(agent, sequential_decision_environment, 500, [(2,2)])"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "It is also possible to plot multiple states on the same plot."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 12,
- "metadata": {
- "collapsed": false
- },
- "outputs": [
- {
- "data": {
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYcAAAEPCAYAAACp/QjLAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3XlcFuX+//EXIqaIO2LuWJZaxzp+TbOyE9lmlprntJkt\nxzYr7VtZHVt+X8XK3CrNLJfMTHNp0bTSY2aGuea+5EKY+66IgiDr/fn9MYDAAN4gt6C8n48HD++Z\nue65r7mEed8z18w1ICIiIiIiIiIiIiIiIiIiIiIiIiIickEaDxwCNuaxvBuwHtgALAGuOkf1EhGR\nYnQj0IK8w+E6oEr66/bA8nNRKRERKX6h5B0OWVUD9vq2KiIi4o0yxV2BLJ4A5hR3JURE5NwI5cxH\nDjcDm3GOHkREpJiVLe4K4HRCf4rT5xCTW4Grr77a1q9ff04rJSJyAVgP/L0wbyzu00oNgBnAw8C2\nvAqtX78eM9OPGf369Sv2OpSUH7WF2kJtkf8PcHVhd86+PnKYCtwEBAN7gH5AQPqyMUBfnFNJo9Ln\npQCtfVwnERE5A1+HQ9czLH8y/UdEREqQ4j6tJAUUFhZW3FUoMdQWp6ktTlNbFA2/4q6Alyz9/JmI\niHjJz88PCrmf15GDiIi4KBxERMRF4SAiIi4KBxERcVE4iIiIi8JBRERcFA4iIuKicBAREReFg4iI\nuCgcRETEReEgIiIuCgcREXFROIiIiIvCQUREXBQOIiLionAQEREXhYOIiLgoHERExEXhICIiLgoH\nERFxUTiIiIiLwkFERFwUDiIi4qJwEBERF4WDiIi4KBxERMTF1+EwHjgEbMynzAggClgPtPBxfURE\nxAu+DofPgfb5LO8ANAYuA54GRvm4PiIi4gVfh8MiICaf5Z2AL9Jf/w5UBWr5uE4iInIGxd3nUBfY\nk2V6L1CvmOoiIiLpijscAPxyTFux1EJERDKVLebP3wfUzzJdL32ei59feJapsPQfERE5LSL95+zl\n/NbuC6HAD0DzXJZ1AHql/9sGGJ7+b05mpgMKEZGC8PPzg0Lu53195DAVuAkIxulb6AcEpC8bA8zB\nCYZtQDzQ3cf1ERERL5yLI4eioCMHEZECOpsjh5LQIS0iIiWMwkFERFwUDiIi4qJwEBERF4WDiIi4\nKBxERMRF4SAiIi4KBxERcVE4iIiIi8JBRERcFA4iIuKicBAREReFg4iIuCgcRETEReEgIiIuCgcR\nEXFROIiIiIvCQUREXBQOIiLionAQEREXhYOIiLgoHERExEXhICIiLgqHc+xI/BHGrRnHg98+yDu/\nvVOg9+6L3cf0zdMxMx/VTkTEUWrDYdPhTbw09yX2xe7z+WclpyXz5YYvafdFOxp/1Jh5f82jbqW6\nzN8+/4zvjU2KZfSq0Vw77lqaj2rOYzMfY+vRrT6vs8iFxMw4kXiiuKtxXjkvw8HMiEuKK9R7d8Ts\noNuMbrSb2I6V+1fywtwXuHPynURFRxVxLSElLYURv48gdHgoE9ZNoFfrXhx8+SBf3/c1j/39MY4m\nHM3zvdEJ0bzxyxuEDg9l/vb59A/rz6FXDnH7pbez8fDGIq9rQURFR/HU90/RZlybbPPTPGmkelKL\nqVYi2ZkZS3YvoefsnjQc3pCQ90JYvHsxAPHJ8UzeMJkHvn2A5XuXF3NNi1ZyWjJ7Tuw56/Wcd+Fw\nOP4wnad15sbPbyzQ+8yM0atG0+rTVjSp0YRtz29jUpdJbI/ZzoG4AyzavShb+ajoKDYd3lToei7c\nuZDmo5ozO2o2c7rNYf6j8/lns39SIaACAMGBwUSfina9z2Mexq4eS9OPm3I04Shre6zl2/u/pX3j\n9gT4B3BVratYc2ANaZ60QtetsA6dPMSzPz7L9eOvp17lekQdi+JA3AHMjDlRc2g+qjmvzHvlnNdL\nSq79cfuJ2BlxTj/zaMJRBvw2gMs+uoynfniKupXr8tPDPzH9/uk8PONhnvr+KeoNq8fkjZPx9/Nn\nzOoxbDi0gZPJJ89pPYuSmbF0z1Ke+fEZ6rxfh2YfN+Oh6Q8Vd7XOCTMz23hoozUc1tB6z+1tF719\nkaWmpZo34pPjrcu0LtZqbCvbfHiza/ngxYPtpbkvZU5P3zzdgt4Nsi7TupiZWUpayhk/w+PxWGpa\nqqWmpVq/X/vZxe9dbLO2zjKPx5Nr+cSURAt4KyDb8kMnD1m7L9pZq7GtbMPBDbm+79tN3xrh2Ee/\nf5RvfVLTUu3thW/b9M3Tc/3sYcuG2cmkk2fcroxtm7pxqoUMDbHec3vb0fijZmZ228TbbMLaCdZl\nWhdr8lET6zm7p3We2tmrdUrJ5vF47MfIH63t+LYWmxhb4PfvPbHXnp/zvAW9G2Shw0N9UEOzpbuX\nWofJHWztgbVmZrbr+C7rObunVRtUzR6f+bit3LfS9ff33pL3bOCigbYvdp+Zme05scf8+/sb4dj4\nNeO9/uxTKae83v/4UmJKok1YO8FajG5hjUc0tnd/e9d2xuy0FXtX2NAlQw244DsobdL6SRYyNMS+\nXP+lmZk1GNbAtkVvO2PjHT913Fp/2toe++4xS0pNyrXMnD/nGOHY1I1TbdCiQdZgWAObsmGKNRjW\nwDYe2mh13q9jE9dNzPc/6IFvHrCbJ9xsnaZ2spsn3Gz7Y/efsW5B7wbZicQTZma2/uB6azisof2/\nX/5fvr90iSmJ9q+v/mWvzns12/z3lrxn90y7x8zMjsYftdsn3W6tP21ttYbWyhYCe0/stTbj2ph/\nf39bsH1Bnp/j8Xhsf+x+S0xJtH/P/Lc1G9nMVuxdka1Mn5/7WJn+ZezVea9aYkqiLd612NqMa3PG\n7c5pzf41Nv+v+Xlu79d/fJ1nyIr3foz80W754hZLTEnMt1xUdJTd+eWd1uSjJkY4tnr/aq8/IzEl\n0d5Z+I5VH1zdes/tbZFHIy3o3aCzrXo2fx37y+77+j6r+35du/yjy637zO72xvw3rPrg6vbaz695\n9beX1R+H/rBXfnrFBvw2IM8yaZ40W7xrsSWnJtvolaOt5pCadt/X9xXb72VKWoqNXzPeGgxrYLdN\nvM1m/znb0jxprnKU4HBoD2wFooA+uSwPBuYC64A/gH/nsR7z7++f7VvwrRNvte4zu+e5wzczO5l0\n0q4bd509P+f5fP8TD8YdNMKxigMq2uUfXW77YvdZmifNqgysYrWG1rL/zPuPNRzWMNf3JqcmW4fJ\nHeyeaffY1aOutoemP5RvnbJqOKyhbT+23ZbuXmo1h9S0KRumePW+L9d/aV2/7Wpmzi9t77m9rdnI\nZlZxQEXbfmy7XfHxFdZ7bm9LSUux1p+2tsW7FpuZWeTRSKv3QT0b8NsAe2LWE/bJik9c656yYYpd\nPepq6z6zu5V/p7y1GdfG7pl2T65HGbuP77Zle5ZlTkdFR1mj4Y282oYM0zZOs4oDKmaGysp9K+34\nqeNmZnb45GG7cfyNRjiZRytScPHJ8fbErCfskg8vsSoDq9iOmB25lvN4PDZq5SgLHhJsQ5cMtaTU\nJOs4paN9t+U7rz5n9f7V1nRkU+s4paNtP7Y9c50BbwXYqZRTZ70dKWkpNnDRQKsxuIa9FfGWxSfH\n259H/zT//v72yIxHbM+JPYVe9/Blw63X7F65LttwcIO1GdfGKg6oaCFDQyxsQljm3+zu47u9/oz8\nfoejoqMyj2aySkhOsITkhGzzFmxfYM1GNrMbx9+Y+bedF0poOPgD24BQIAAnAJrlKBMODEx/HQxE\nA2VzWZetO7Au20bfPeVuIxxbtW9V5ryYUzGZrz0ej9339X32yIxHvE73WVtnZfvW8eSsJ23C2gnm\n8Xis0ruVLDohOlt5j8djD8942DpO6WgpaSmWlJpUoG8SLce0tLGrxlrNITXtv1H/9fp9ETsi7Mbx\nN5rH47H/nfO/dt246yw6IdquG3ed1Rhcw8J/Dc8s+9h3j9m41eMs8mik1X2/rn225jMzM3t/6fv2\n/Jzns6134rqJVvu92vbANw/YnV/eaZ+s+MT6Luib6zeS3MQmxlrFARXzXJ6UmpS54zcz+2zNZ1b3\n/bq2ZPcSCxwQaFM2TLGyb5W1YcuG2c6YnXbJh5fYG/PfsOafNC/Qt9eTSSet09RO+X4TLC0ij0ba\nFR9fYd2md7PYxFhrOaalLd+z3FUuLinOukzrYi3HtLStR7Zmzu85u6d9uPzDfD/D4/HYB0s/sOAh\nwTZ5w2TX8trv1T6rHbeZ2c6YndZmXBtr90U72xmzM9uyrH/3hTVt4zS79+t7s81LTUu1QYsGWfCQ\nYBuzaoxFJ0TbL9t/yfwbb/1pa1u6e2m+6117YK1tPrzZ3pj/hvn398/WtmanA6/c2+Ws5+ye2ZbN\n/2u+EY49OetJM3O288lZT1q9D+rZzC0zvdrXUELD4Tqco4IMr6X/ZNUD+Dj99SXAn3msy7XRh08e\ntlZjW9mPkT+amdmfR/80wrH1B9ebmbPzazW2VZF8YzFzduQZ35KTU5Nt6sapNnjxYGv9aWtXsnvr\n9km3W8BbAfb1H18X6H1R0VEWOjzUhiweYs0/aZ75xzFq5SgbvHhwtrIDFw20btO7Wf0P6mcGg5lz\nKu22ibdlTs/YPMPqvF8n1z4Zb3k8HqvwToVcjzKSU5Ptjkl32H1f32dmZmNXjbX6H9S3yKORZmbW\n/JPmFjwk2F77+TW74bMb7NIPL7Xhy4abmVmnqZ1sxuYZXtXhVMopu+WLW+zBbx+0ygMr25H4I4Xe\nnvNNbGKsDVk8JDPMl+5earWG1rIxq8Zk7kg6TO5g32/9Ptv7dh/fbVePutoen/m466h38OLB9vJP\nL1vEjgjbcmSLHTp5KNvypNQke3zm49ZidIvMo4Wcrhp1VWa/gLcy+vAGLx5sn67+1C5+72IbumSo\n119UCmrhzoV2w2c3mJlzaiw6Idpun3S7/ePzf+R5pPXPr/7p+tvdEbPDohOizePx2NAlQy14SLAF\nvBVgHad0tJs+v8m++uOrzLL7Y/db2/Ftrd0X7WziuonW+tPWZua06avzXrU679extxe+bc0/aW5L\ndi+x+h/Ut2d/fDbzVLQ3OItwyO1belGpC2S9nmovcG2OMp8CC4D9QCXgfm9XXrNiTa4MuZIDJw/g\nMQ+Pf/84AJFHIynnX453F73LyqdWUr5s+bPaiAxNgpsQeTSSNvXaMGDRAN7+7W2qla/Gmh5rMq9A\nKqjmIc35R4N/cN+V9xXofXUr1WXPiT18sPwDVj61kqrlqwLwzDXPuMo2DW7K67+8zlthb/F4i8cz\n57eo3YJV+1eRkpbCH4f/oMePPfhvt//SrGbOgzvv+fn5EVIxhEPxh7ik3CUAxCXFMWz5MHYc38Hx\nxOP8cfgP5v01j74RfVnUfRGNqzcGYNCtgwitGkq18tUYtGQQA28ZyAttXgCgYZWG7Dy+M9/P3h+3\nn24zulHGrwzBgcF82eVLWo5tyZ4TewgODC70Np1Le2P3UjuoNv5l/Av0vphTMRhGx6kdWbZnGR2b\ndGT3id10m9GNifdM5M7L7swsW6tiLQ7FH8qc3h6znXZftKNX6168fN3L+Pn5ZVt3gyoNGLR4EB+v\n/JjE1ERevu5l3rv9PQASUhLoPK0zFcpW4LfuvxFULijX+gUHBud72XZWUzdO5f9+/T885iEwIJDy\nZcsTfSqaL7t8yS2X3FKgdimIi4Mu5uDJg0zZOIUnvn+C6hWq0/VvXRl06yDKlsl9N1m3Ul32xTn3\nSSWlJvHrzl+5/5v76dy0MwBbjmxh1VOrCPAPoHZQbfr+2pdNhzcxLHYYY1aPIS45jmdaPsOb/3iT\nhJQEevzYgwNxB+g6vSsVy1Vk/TPrqVq+KkOWDKHLV134rNNn3H353T5rg5x8GQ7eJNYbOKebwoBL\ngZ+BqwHXTQzh4eGZr8PCwggLC6N2UG0OnjzI+LXjSUlLoXeb3kQdi2L478PpH9afRtUaFcV2ANCk\nRhMioyNZtX8Vo1aNYvXTqylbpiwNqjQo9Doz/sgKqkJABa4MuZJhdwyjXuV6+Za9scGNvNvuXV5r\nm/2g7eKgi2lcvTFzoubw8ryX+ejOj2hZp2Wh6pNzvQfiDnBJtUswM7rP6s7v+36ndlBtFjy2gNDh\noTw0/SFmPDAjMxgAOlzWIfN1ZK9ILq9xeeZ0wyoN2XViV56fmZCSQKepnbisxmUEBQTx8V0f41/G\nnzqV6rAvbh8tarc46+3ytQ2HNtB2fFu+uverbDvz/ByJP8LxxOOEfRHG8cTjPHb1Y9SvXJ8Pln3A\nd1u/Y+YDM7mhwQ3Z3lOrYi0OnTzE8r3LmbpxKrMiZ/GfG/7Dc62ey/UzWlzcgtZ1W/N5588ZvWp0\n5v9DQkoCHad2pF7leozvND7fQAsODOZI/JEzbs9naz6jX0Q/+t3Uj7JlyhKXHMez1zxLgH+AV+1x\nNmoH1WbH8R28Nv81Zj4wE8No37h9vu+pV7keu0/s5oX/vsDEDRMpX7Y8E7tM5IFvH6Bzk8781v03\nAgMCM8tfGXIlPX7sQf3K9Xm97evUq1wvM/CCygXRum5rmn7clOeueY532r2T2aZj7h5Dm3ptvNqf\nRUREEBERUfiGOEfakP200uu4O6XnAFl/e38BrsllXbkeMn24/EN7ZMYjFjI0xNYeWGtjVo2xeh/U\nsxajWxT54efsP2fbDZ/dYK3GtrIJaycU6boLoyiukhixfISVf6e8PTHriSKokeOluS/Z7ZNuty/W\nfWEf/f6RXTP2mmyX/fX4oUeBLhk0cy4tvmvyXXkuf/r7p63rt11dbfLkrCdt9MrRBd+Ic2xf7D6r\n/0F9azyisY1ZNcar9+w6vssufu9i8+/vb8OWDbOf//rZPB6PjVg+wgLeCrCftv2U6/uGLRtmHSZ3\nsFpDa1mXaV28/jwzpyP0xvE3Zp4ifGj6Q15dztlzdk8bsXyEmVmep3mnbZxmdd6vY38e/dPr+hQl\nj8djr/z0SoE6mCdvmGwV3qlg1427ziJ2RGT2hURFR+W6/9kXu8+enPVktn63rOKT423J7iWF24A8\nUEL7HMoCf+F0SJcj9w7pD4B+6a9r4Zx6qp7LunLd8K//+Nr8+/vbwzMeNrPTHTi/bP+lSBvYzLlq\nIHBAoF376bU+O+95rnk8Hvtuy3de3+/gjdX7V5tfuJ+FDA2xGoNrZPYpnI2dMTut1tBauQbid1u+\ns0bDG+V6Hrbvgr7Wd0Hfs/78onAq5VSuO4Wk1CRrNbaVDfhtgPVd0Nf6/drPVWbW1ll2MO5g5nRi\nSqK1GtvKhiwe4rof5vip465LjrOasmGKEU6BQiHDjpgdVu+DetZzdk9r/2V7r+7/MTN7e+Hb9spP\nr9ifR/+0aoOq2cp9K7Mtn7dtnoUMDcnsLzxfRB6NtIemP2RxSXHFXZU8UULDAeBOIBLnqqXX0+f1\nSP8B5wqlH4D1wEYgr1v6ct3wRbsWmX9/f4uKjjIzp0PuTFdWnI1BiwYVuGOtNNp7Yq+1GtuqyP4v\nPB6Plelfxggn2w7pQNwBqzW0Vp7ftsasGmOEc8YrSnKK2BFhHSZ3KLJr2D0ej5V9q6zdOvFW17I+\nP/exu6fcbR6Px0avHG1Pff9UtuWLdi2yMv3LWP+I/pnzes7uaV2mdSlU/Q7EHch2YUJBpKSlWNm3\nylrTkU3z/Pabm8W7Fluzkc2s2chm1nBYw2w3cP517C8LGRpiC3cuLFSdJH+U4HAoKrlueFJqks3b\nNu8cN7d4Izk1uUjXV/f9ukY4diDuQOa8btO7WZ+f++T5nhmbZ2S7FNAbp1JO2eUfXW6VB1a2X3f8\nWuB6pqalur5Rj1s9zggn80qtDAu2L7Da79XOvALoh8gf7K7Jd1l8crwNXzbcTiadtEs/vNT6/NzH\n/vbJ38zMuUP+kg8vKZLLNwvjxf++WOBTPylpKVZtUDV79sdnbfTK0VZlYBX7dtO3tv7gert61NWZ\np5yk6FFaw0FKj7ikOGs2slnmaZSFOxda/Q/q53tKLDUt1d785U179LtHvf6cfr/2sy7TutjUjVOt\nxuAa9vT3Txeonj1n97TgIcG26/guM3OGRKk5pKYNWjTI7ph0R2a5hOQECx0eanP+nJM5b/X+1UY4\n1vyT5kY4dueXd9rDMx62NE+aVR9c3bYc2WK136t9xhufSqLNhzdbcmqyrdm/xgjHAt4KsCoDq9iz\nPz6ru999iBJ6KatIkQkqF0RIxRCOJBzBzHh53ssMuW0IFctVzPM9/mX8uTn0Zt7+7W2vPiMqOoqR\nK0ay7pl11Ktcj9suuY3QD0MZdfcoyvideYzKDYc28M3mb7i8xuWsPbCWBlUa8NJPL/Hvv/+bsNAw\npm+Znll28JLBtKrTKtuVSXUr1QXgiRZPsPHwRmZHzWbTc5so41eG6+tfT6epnbin6T2uK5DOBxmX\nSLeo3YKjrx7l83Wfc+8V9xJaNbR4KyZ5UjjIeaNmxZocTTjKzK0zSfWkcv+VZ74tpmHVhqzYt4Lp\nm6fzryv+lW/Z//v1/3ipzUuZlwfXCKxBlYuqsPXoVq6oeQUpaSl5XlZpZrw490X63dSPTYc3sevE\nLhbvXszi3YvZ/Nxm9sft59ipY4AzbPzIFSNZ02NNtnXUCqrF7hd3U79KfY6dOsbL171M9QrO9Rlt\n67dlzYE1DLxloOuzzzc1AmvwyvUavbekO++G7JbSK7hCMIfjD9M3oi/v3PyOV9/m61euz6nUU9z7\nzb35llt7YC0Ldy3kxTYvZptfr3I9rvzkShbvXswlIy5h0+FN7D6xO9vT+E4knuDJ75/kSMIRnm75\nNA2rNmTX8V28ueBN+of1p2K5ilSvUJ2YxBgAXv35VV5s82Ku98jUr1IfgOoVqme7IfG5Vs+x8N8L\nqVK+yhm3WaQoKBzkvFGzYk0mrp9IQJmAbDfN5eeishfRsEpDQiqG5Fvurd/e4vW2r7tOU9WuVBuA\n7rO6szd2L/O3z6f5qOZsOLQBgMTURPpF9GP/yf3MuH9G5o2RX2/+mkMnD/HwVQ8DULV8VU4knmDd\nwXUs2bOE3tf1LtC2V7qoUrabBkV8TeEg542agTVZuX9lrsM85GfeI/OofFHlPJdvPbqVJbuX8OT/\nPOla9tW9XzHln1PYdmwb3Zp3I3xhOLFJseyP2w/A83OeZ+GuhYy6axSX1bgMcO7o3hu7l/5h/TOH\nXvAv409QuSBe/flVXr7u5Wx3zoqUROpzkPNGzYo1qVe5nld9DVkFBgSSkJKQ5/L3lr5Hz1Y9c91h\nl/Mvx92X383nnT/n4qCLmbxxMq3qtOJQ/CEOxB3g2y3fEvV8VLbxm66oeQW9WvVyjZlVvUJ11h5Y\ny3cPfFeg+osUB++/fhUvy3qOV0qnuKQ4dh7fSfNazQv0vmOnjnHpiEuJ6RPjWhadEM2lIy5l2/9u\nO+MAffHJ8czYMoONhzdSo0INYpNiOZF0gpEdRnpVj2vGXsM/m/2TN258o0D1Fyms9CPsQu3ndeQg\n541KF1UqcDAAVAyomOeRwxfrv6BTk05ejdxasVxFHrn6Ed5f+j5/xfzFN5u/YdkTy7yux6i7RvG3\nkL95XV6kOKnPQS545fzLkepJJdWTmjnPzDAzxqweQ4+WPfJ5t1vG6aUWF7coUCdxq7qtCj28u8i5\npnCQC56fn1+2foeInRHcOulWFu1eRECZAK6vf32B1lcrqBaxSbE83fJpX1RXpERQOEipkDUcJqyb\nwO97f2fS+kk8evWjBbryCZwH4NQOqk2nJp18UVWREkEd0lIqNPqwEb88+gu1g2pT54M6+OHHqdRT\nRPaKLNQDm+KT4/MdukOkJFCHtMgZZHRKz46azTV1riGgTAAnk08W+kl+Cga50CkcpFTIOK309aav\nefDKBzEsc9wiEXFTOEipEBgQSMypGOb9NY+RHUaecTgNkdJOHdJSKgQGBDI7ajZXhlypYBDxgsJB\nSoXAgECm/jGVzk06F3dVRM4LCgcpNY4mHNXlpyJeUjhIqbA3di/VylejaXDT4q6KyHlB9zlIqRBz\nKoYKARUoX7Z8cVdF5Jw5m/scFA4iIheoswkHnVYSEREXhYOIiLgoHERExEXhICIiLgoHERFxUTiI\niIiLr8OhPbAViAL65FEmDFgL/AFE+Lg+IiLiBV/e5+APRAK3AvuAlUBXYEuWMlWBJcAdwF4gGDia\ny7p0n4OISAH56mE/L+eYNuAIsBjY4cW6WwPbgJ3p09OAzmQPh4eA6TjBALkHg4iInGP5nVaqBARl\n+akEtALm4hwBnEldYE+W6b3p87K6DKgO/AqsAh7xqtYiIuJT+R05hOcxvzrwCzD1DOv25jxQAPA/\nwC1AILAMWI7TR5G9MuGnqxMWFkZYWJgXqxcRKT0iIiKIiIgoknUVts9hLdDiDGXa4ARM+/Tp1wEP\nMDhLmT5ABU4H0TicI5Nvc6xLfQ4iIgV0rsdWuhmI8aLcKpzTRqFAOeAB4PscZWYBbXE6rwOBa4HN\nhaiTiIgUofxOK23MZV414ADwqBfrTgV6AT/h7Pw/w+mM7pG+fAzOZa5zgQ04RxWfonAQESl2+R1u\nhOaYNiAaOOmz2uRNp5VERApIz3MQEREXPc9BRESKlMJBRERcFA4iIuKicBAREReFg4iIuCgcRETE\nReEgIiIuCgcREXFROIiIiIvCQUREXBQOIiLionAQEREXhYOIiLgoHERExEXhICIiLgoHERFxUTiI\niIiLwkFERFwUDiIi4qJwEBERF4WDiIi4KBxERMRF4SAiIi4KBxERcVE4iIiIi8JBRERcFA4iIuLi\n63BoD2wFooA++ZRrBaQC//RxfURExAu+DAd/YCROQFwBdAWa5VFuMDAX8PNhfURExEu+DIfWwDZg\nJ5ACTAM651LueeBb4IgP6yIiIgXgy3CoC+zJMr03fV7OMp2BUenT5sP6iIiIl8r6cN3e7OiHA6+l\nl/Ujn9NK4eHhma/DwsIICws7u9qJiFxgIiIiiIiIKJJ1+fIcfxsgHKfPAeB1wIPTv5Bhe5Y6BAMJ\nwFPA9znWZWY6qBARKQg/Pz8o5H7el+FQFogEbgH2AytwOqW35FH+c+AHYEYuyxQOIiIFdDbh4MvT\nSqlAL+BhZPYoAAAMJklEQVQnnCuSPsMJhh7py8f48LNFROQsnC+XjurIQUSkgM7myEF3SIuIiIvC\nQUREXBQOIiLionAQEREXhYOIiLgoHERExEXhICIiLgoHERFxUTiIiIiLwkFERFwUDiIi4qJwEBER\nF4WDiIi4KBxERMRF4SAiIi4KBxERcVE4iIiIi8JBRERcFA4iIuKicBAREReFg4iIuCgcRETEpWxx\nV0BEJDfVq1cnJiamuKtxXqhWrRrHjh0r0nX6FenafMfMrLjrICLnkJ+fH/q7905ebeXn5weF3M/r\ntJKIiLgoHERExEXhICIiLgoHERFxORfh0B7YCkQBfXJZ3g1YD2wAlgBXnYM6iYictddff50PP/zQ\n55/zww8/8OCDD/r8c7LydTj4AyNxAuIKoCvQLEeZ7cA/cELhbWCsj+skInLWjhw5wqRJk3jmmWcA\n2Lx5M9dccw3Vq1enatWq3HDDDSxevNjrdXXt2pW6detStWpV2rZty4oVKzKXd+zYkU2bNrFx40af\nbEtufB0OrYFtwE4gBZgGdM5RZhlwIv3170A9H9dJROSsTZgwgbvuuouLLroIgLp16/LNN98QHR1N\nTEwMDz74IPfee69X6zp58iTXXnsta9asISYmhscee4y77rqL+Pj4zDJdu3Zl7Nhz993Z1+FQF9iT\nZXpv+ry8PAHM8WmNRESKwNy5c7npppsyp6tUqUKjRo3w8/MjLS2NMmXKULt2ba/W1ahRI1588UVq\n1aqFn58fTz31FMnJyfz555+ZZcLCwpg9e3aRb0defH2HdEHuYLkZeBy4wUd1EREpMhs3bqRJkyau\n+VWrViU+Pp46deqwYMGCQq173bp1JCcn07hx48x5TZs2ZefOnZw8eZKgoKBC19tbvg6HfUD9LNP1\ncY4ecroK+BSnbyLX++XDw8MzX4eFhREWFlZUdRSR85RfEY3xUJgbsY8fP06lSpVynZ+QkED//v25\n7777WL16dcadyl6JjY3lkUceITw8PNv6M14fP348z3CIiIggIiKiYBuSB18Pn1EWiARuAfYDK3A6\npbdkKdMAWAA8DCzPYz0aPkOklCnpw2fUqlWLOXPm0LJly1yXmxmVKlVi6dKlXHWVdxdhnjp1ivbt\n29O0aVPGjBmTbdmxY8cIDg4mNjbWFQ7n4/AZqUAv4CdgM/AVTjD0SP8B6AtUA0YBa3ECRESkRLvq\nqquIjIzMc3laWhoej4fAwECv1peUlMQ999xDgwYNXMEAsGXLFkJDQ8/JKSU4N/c5/BdoAjQGBqbP\nG5P+A/AkUANokf7T+hzUSUTkrHTo0IGFCxdmTs+fP59169aRlpZGbGwsvXv3pkmTJpn9BhMmTKBR\no0a5rislJYV7772XwMBAJkyYkGuZhQsX0qFDhyLfjrzoDmkRkUJ49NFHmTNnDomJiYDTF9C1a1eq\nVq1KkyZNOHLkCN9//31m+T179tC2bdtc17V06VJmz57Nzz//TNWqValUqRKVKlViyZIlmWWmTZtG\njx49cn2/L2jIbhEpkUp6nwPAm2++SUhICC+88MIZy95xxx2MGDEi1yuczuSHH35g8uTJTJs2Ldfl\nvuhzUDiISIl0PoRDSXE+dkiLiMh5SOEgIiIuCgcREXFROIiIiIvCQUREXBQOIiLionAQEREXhYOI\nSCHpMaEiIpJNzseELl++nNtuu40aNWoQEhLC/fffz8GDB71eV2l7TKiIyAUp52NCjx8/zjPPPMOu\nXbvYtWsXlSpVonv37l6tqyQ+JlTDZ4hIiVTSh8+45ZZbeOKJJ3jooYdyXb5mzRrCwsKIjY0t1Pqr\nVKlCREQELVq0AJzB+R5++GG2b9/uKqvhM0RESoi8HhOa4bfffuNvf/tbodZ9pseEngu+fkyoiIjP\n+PUvmpMf1q/gRyh5PSYUYMOGDbz99tvZhuz21tk8JrQoKRxE5LxVmJ16UalWrRpxcXGu+du2baND\nhw6MGDGCG264oUDrPHXqFB07duT666+nT58+2ZZlfFbVqlULX+kC0GklEZFCyO0xobt27eK2226j\nb9++dOvWrUDrK42PCRURueDkfEzovn37aNeuHb169eLpp592lddjQkVESoGcjwkdN24cO3bsyOwr\nqFSpEpUrV84sr8eE+oYuZRUpZUr6paygx4SWBAoHkVLmfAiHkkL3OYiIyDmhcBAREReFg4iIuCgc\nRETEReEgIiIuGj5DREqkatWqZVxtI2dQrVq1Il+nr1u+PTAc8AfGAYNzKTMCuBNIAP4NrM2ljC5l\nFREpoJJ6Kas/MBInIK4AugLNcpTpADQGLgOeBkb5sD4XhIiIiOKuQomhtjhNbXGa2qJo+DIcWgPb\ngJ1ACjAN6JyjTCfgi/TXvwNVgVo+rNN5T7/4p6ktTlNbnKa2KBq+DIe6wJ4s03vT552pTD0f1klE\nRLzgy3DwtpMg5/kwdS6IiBQzX3ZItwHCcfocAF4HPGTvlB4NROCccgLYCtwEHMqxrm3ApT6qp4jI\nheovnH7dEqUsTsVCgXLAOnLvkJ6T/roNsPxcVU5ERIrPnUAkzjf/19Pn9Uj/yTAyffl64H/Oae1E\nREREROTC0B6nHyIK6HOGsheC8Tj9LRuzzKsO/Az8CczDudw3w+s4bbMVuP0c1fFcqQ/8CmwC/gD+\nN31+aWyP8jiXeq8DNgMD0+eXxrbI4I9zw+wP6dOltS12Ahtw2mJF+rwLvi38cU43hQIB5N5ncaG5\nEWhB9nAYAvwn/XUfYFD66ytw2iQAp422cWGNlXUx8Pf010E4pyebUXrbIzD937I4fXNtKb1tAdAb\nmAx8nz5dWttiB04YZHXBt8V1wNws06+l/1zoQskeDls5fWPgxenT4HwDyHo0NRenU/9CNRO4FbVH\nILASuJLS2xb1gPnAzZw+ciitbbEDqJFjXpG0RUlODW9uoisNanH60t5DnP5Pr4PTJhku5PYJxTmi\n+p3S2x5lcL71HeL06bbS2hbDgFdxLo3PUFrbwnCCchXwVPq8ImmLkjwqq26GczPyb5cLsc2CgOnA\nC0BcjmWlqT08OKfZqgA/4Xxrzqq0tMXdwGGcc+xheZQpLW0BcANwAKiJ08+wNcfyQrdFST5y2IfT\nKZmhPtlTr7Q4hHNoCFAb5w8D3O1TL33ehSQAJxgm4ZxWgtLdHgAngNlAS0pnW1yPMybbDmAq0A7n\n96M0tgU4wQBwBPgOZ0y7C74tvLmJ7kIUirtDOuM84Wu4O5fKAY1w2upCGvzeD5iIcwohq9LYHsGc\nvuKkAvAbcAulsy2yuonTfQ6lsS0CgUrprysCS3CuQCoVbZHbTXQXsqnAfiAZp7+lO86VCPPJ/bK0\nN3DaZitwxzmtqe+1xTmVsg7nFMJanEubS2N7NAfW4LTFBpzz7VA62yKrmzh9tVJpbItGOL8T63Au\n987YR5bGthARERERERERERERERERERERERERERE5n5xM/7ch0LWI1/1GjuklRbx+ERHxkYwxmcI4\nfUett840/ljO8Z5EROQ8kbEDXw4cx7nb+gWcscWG4jwkZT3wdHq5MGARMIvTA5nNxBn58g9Oj345\nCEhNX9+k9HkZRyl+6eveiHNX8/1Z1h0BfANsAb7MUs9BOKOtrk9/r4iI+FBGOGQdiwecMHgz/fVF\nOM9JCMXZgZ/EOQ2VoVr6vxVwdvgZ0zmPHDKm/4UzdIEfEALswhkMLQwnoOqkL1uKM7JmDbKPqFnZ\n240T8YWSPCqrSFHLOcjY7cCjON/8l+OMSdM4fdkKnB16hhdwxrBZhjOy5WVn+Ky2wBScIZEPAwuB\nVunTK3DG0LL0dTbECYxE4DOgC3CqoBsnUpQUDlLa9cJ5kFAL4FKcAcsA4rOUCcMZBbUNzjMV1uI8\n1zk/hjuMMsbOT8oyLw1naPI0nOGWv8V5ZsFcRIqRwkFKkzhOD3EMzkNznuN0p/PlnH5Wc1aVgRic\nb/ZNyf5oxRRy77ReBDyA8zdWE/gHzhFDXkMkV8QZPfO/OM9HvvqMWyPiQyX5SXAiRSXjG/t6nG/o\n64DPgRE4fQxrcHbah3FO6eR8etZc4BlgM84Q8suyLBuL0+G8Gngky/u+w3kO+vr0ea+mr78Z7qdv\nGU5ozcI5IvEDXir01oqIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIhcyP4/S815E0SFY3EAAAAA\nSUVORK5CYII=\n",
- "text/plain": [
- "