diff --git a/README.md b/README.md index 8bac287b6..a7b5d1667 100644 --- a/README.md +++ b/README.md @@ -112,26 +112,26 @@ Here is a table of algorithms, the figure, name of the algorithm in the book and | 10.3 | Three-Block-Tower | `three_block_tower` | [`planning.py`][planning] | Done | Included | | 10.7 | Cake-Problem | `have_cake_and_eat_cake_too` | [`planning.py`][planning] | Done | Included | | 10.9 | Graphplan | `GraphPlan` | [`planning.py`][planning] | Done | Included | -| 10.13 | Partial-Order-Planner | | | | | -| 11.1 | Job-Shop-Problem-With-Resources | `job_shop_problem` | [`planning.py`][planning] | Done | | +| 10.13 | Partial-Order-Planner | `PartialOrderPlanner` | [`planning.py`][planning] | Done | Included | +| 11.1 | Job-Shop-Problem-With-Resources | `job_shop_problem` | [`planning.py`][planning] | Done | Included | | 11.5 | Hierarchical-Search | `hierarchical_search` | [`planning.py`][planning] | | | | 11.8 | Angelic-Search | | | | | -| 11.10 | Doubles-tennis | `double_tennis_problem` | [`planning.py`][planning] | | | +| 11.10 | Doubles-tennis | `double_tennis_problem` | [`planning.py`][planning] | Done | Included | | 13 | Discrete Probability Distribution | `ProbDist` | [`probability.py`][probability] | Done | Included | | 13.1 | DT-Agent | `DTAgent` | [`probability.py`][probability] | | | | 14.9 | Enumeration-Ask | `enumeration_ask` | [`probability.py`][probability] | Done | Included | | 14.11 | Elimination-Ask | `elimination_ask` | [`probability.py`][probability] | Done | Included | -| 14.13 | Prior-Sample | `prior_sample` | [`probability.py`][probability] | | Included | +| 14.13 | Prior-Sample | `prior_sample` | [`probability.py`][probability] | Done | Included | | 14.14 | Rejection-Sampling | `rejection_sampling` | [`probability.py`][probability] | Done | Included | | 14.15 | Likelihood-Weighting | `likelihood_weighting` | [`probability.py`][probability] | Done | Included | | 14.16 | Gibbs-Ask | `gibbs_ask` | [`probability.py`][probability] | Done | Included | -| 15.4 | Forward-Backward | `forward_backward` | [`probability.py`][probability] | Done | | -| 15.6 | Fixed-Lag-Smoothing | `fixed_lag_smoothing` | [`probability.py`][probability] | Done | | -| 15.17 | Particle-Filtering | `particle_filtering` | [`probability.py`][probability] | Done | | -| 16.9 | Information-Gathering-Agent | | | | | +| 15.4 | Forward-Backward | `forward_backward` | [`probability.py`][probability] | Done | Included | +| 15.6 | Fixed-Lag-Smoothing | `fixed_lag_smoothing` | [`probability.py`][probability] | Done | Included | +| 15.17 | Particle-Filtering | `particle_filtering` | [`probability.py`][probability] | Done | Included | +| 16.9 | Information-Gathering-Agent | `InformationGatheringAgent` | [`probability.py`][probability] | Done | Included | | 17.4 | Value-Iteration | `value_iteration` | [`mdp.py`][mdp] | Done | Included | | 17.7 | Policy-Iteration | `policy_iteration` | [`mdp.py`][mdp] | Done | Included | -| 17.9 | POMDP-Value-Iteration | | | | | +| 17.9 | POMDP-Value-Iteration | `pomdp_value_iteration` | [`mdp.py`][mdp] | Done | Included | | 18.5 | Decision-Tree-Learning | `DecisionTreeLearner` | [`learning.py`][learning] | Done | Included | | 18.8 | Cross-Validation | `cross_validation` | [`learning.py`][learning] | | | | 18.11 | Decision-List-Learning | `DecisionListLearner` | [`learning.py`][learning]\* | | | @@ -147,7 +147,7 @@ Here is a table of algorithms, the figure, name of the algorithm in the book and | 22.1 | HITS | `HITS` | [`nlp.py`][nlp] | Done | Included | | 23 | Chart-Parse | `Chart` | [`nlp.py`][nlp] | Done | Included | | 23.5 | CYK-Parse | `CYK_parse` | [`nlp.py`][nlp] | Done | Included | -| 25.9 | Monte-Carlo-Localization | `monte_carlo_localization` | [`probability.py`][probability] | Done | | +| 25.9 | Monte-Carlo-Localization | `monte_carlo_localization` | [`probability.py`][probability] | Done | Included | # Index of data structures diff --git a/images/pop.jpg b/images/pop.jpg new file mode 100644 index 000000000..52b3e3756 Binary files /dev/null and b/images/pop.jpg differ diff --git a/mdp.ipynb b/mdp.ipynb index aa74514e0..b9952f528 100644 --- a/mdp.ipynb +++ b/mdp.ipynb @@ -4,9 +4,10 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Markov decision processes (MDPs)\n", + "# Making Complex Decisions\n", + "---\n", "\n", - "This IPy notebook acts as supporting material for topics covered in **Chapter 17 Making Complex Decisions** of the book* Artificial Intelligence: A Modern Approach*. We makes use of the implementations in mdp.py module. This notebook also includes a brief summary of the main topics as a review. Let us import everything from the mdp module to get started." + "This Jupyter notebook acts as supporting material for topics covered in **Chapter 17 Making Complex Decisions** of the book* Artificial Intelligence: A Modern Approach*. We make use of the implementations in mdp.py module. This notebook also includes a brief summary of the main topics as a review. Let us import everything from the mdp module to get started." ] }, { @@ -16,7 +17,7 @@ "outputs": [], "source": [ "from mdp import *\n", - "from notebook import psource, pseudocode" + "from notebook import psource, pseudocode, plot_pomdp_utility" ] }, { @@ -30,7 +31,10 @@ "* Grid MDP\n", "* Value Iteration\n", " * Value Iteration Visualization\n", - "* Policy Iteration" + "* Policy Iteration\n", + "* POMDPs\n", + "* POMDP Value Iteration\n", + " - Value Iteration Visualization" ] }, { @@ -2170,6 +2174,769 @@ "For in-depth knowledge about sequential decision problems, refer **Section 17.1** in the AIMA book." ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## POMDP\n", + "---\n", + "Partially Observable Markov Decision Problems\n", + "\n", + "In retrospect, a Markov decision process or MDP is defined as:\n", + "- a sequential decision problem for a fully observable, stochastic environment with a Markovian transition model and additive rewards.\n", + "\n", + "An MDP consists of a set of states (with an initial state $s_0$); a set $A(s)$ of actions\n", + "in each state; a transition model $P(s' | s, a)$; and a reward function $R(s)$.\n", + "\n", + "The MDP seeks to make sequential decisions to occupy states so as to maximise some combination of the reward function $R(s)$.\n", + "\n", + "The characteristic problem of the MDP is hence to identify the optimal policy function $\\pi^*(s)$ that provides the _utility-maximising_ action $a$ to be taken when the current state is $s$.\n", + "\n", + "### Belief vector\n", + "\n", + "**Note**: The book refers to the _belief vector_ as the _belief state_. We use the latter terminology here to retain our ability to refer to the belief vector as a _probability distribution over states_.\n", + "\n", + "The solution of an MDP is subject to certain properties of the problem which are assumed and justified in [Section 17.1]. One critical assumption is that the agent is **fully aware of its current state at all times**.\n", + "\n", + "A tedious (but rewarding, as we will see) way of expressing this is in terms of the **belief vector** $b$ of the agent. The belief vector is a function mapping states to probabilities or certainties of being in those states.\n", + "\n", + "Consider an agent that is fully aware that it is in state $s_i$ in the statespace $(s_1, s_2, ... s_n)$ at the current time.\n", + "\n", + "Its belief vector is the vector $(b(s_1), b(s_2), ... b(s_n))$ given by the function $b(s)$:\n", + "\\begin{align*}\n", + "b(s) &= 0 \\quad \\text{if }s \\neq s_i \\\\ &= 1 \\quad \\text{if } s = s_i\n", + "\\end{align*}\n", + "\n", + "Note that $b(s)$ is a probability distribution that necessarily sums to $1$ over all $s$.\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": true + }, + "source": [ + "### POMDPs - a conceptual outline\n", + "\n", + "The POMDP really has only two modifications to the **problem formulation** compared to the MDP.\n", + "\n", + "- **Belief state** - In the real world, the current state of an agent is often not known with complete certainty. This makes the concept of a belief vector extremely relevant. It allows the agent to represent different degrees of certainty with which it _believes_ it is in each state.\n", + "\n", + "- **Evidence percepts** - In the real world, agents often have certain kinds of evidence, collected from sensors. They can use the probability distribution of observed evidence, conditional on state, to consolidate their information. This is a known distribution $P(e\\ |\\ s)$ - $e$ being an evidence, and $s$ being the state it is conditional on.\n", + "\n", + "Consider the world we used for the MDP. \n", + "\n", + "![title](images/grid_mdp.jpg)\n", + "\n", + "#### Using the belief vector\n", + "An agent beginning at $(1, 1)$ may not be certain that it is indeed in $(1, 1)$. Consider a belief vector $b$ such that:\n", + "\\begin{align*}\n", + " b((1,1)) &= 0.8 \\\\\n", + " b((2,1)) &= 0.1 \\\\\n", + " b((1,2)) &= 0.1 \\\\\n", + " b(s) &= 0 \\quad \\quad \\forall \\text{ other } s\n", + "\\end{align*}\n", + "\n", + "By horizontally catenating each row, we can represent this as an 11-dimensional vector (omitting $(2, 2)$).\n", + "\n", + "Thus, taking $s_1 = (1, 1)$, $s_2 = (1, 2)$, ... $s_{11} = (4,3)$, we have $b$:\n", + "\n", + "$b = (0.8, 0.1, 0, 0, 0.1, 0, 0, 0, 0, 0, 0)$ \n", + "\n", + "This fully represents the certainty to which the agent is aware of its state.\n", + "\n", + "#### Using evidence\n", + "The evidence observed here could be the number of adjacent 'walls' or 'dead ends' observed by the agent. We assume that the agent cannot 'orient' the walls - only count them.\n", + "\n", + "In this case, $e$ can take only two values, 1 and 2. This gives $P(e\\ |\\ s)$ as:\n", + "\\begin{align*}\n", + " P(e=2\\ |\\ s) &= \\frac{1}{7} \\quad \\forall \\quad s \\in \\{s_1, s_2, s_4, s_5, s_8, s_9, s_{11}\\}\\\\\n", + " P(e=1\\ |\\ s) &= \\frac{1}{4} \\quad \\forall \\quad s \\in \\{s_3, s_6, s_7, s_{10}\\} \\\\\n", + " P(e\\ |\\ s) &= 0 \\quad \\forall \\quad \\text{ other } s, e\n", + "\\end{align*}\n", + "\n", + "Note that the implications of the evidence on the state must be known **a priori** to the agent. Ways of reliably learning this distribution from percepts are beyond the scope of this notebook." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### POMDPs - a rigorous outline\n", + "\n", + "A POMDP is thus a sequential decision problem for for a *partially* observable, stochastic environment with a Markovian transition model, a known 'sensor model' for inferring state from observation, and additive rewards. \n", + "\n", + "Practically, a POMDP has the following, which an MDP also has:\n", + "- a set of states, each denoted by $s$\n", + "- a set of actions available in each state, $A(s)$\n", + "- a reward accrued on attaining some state, $R(s)$\n", + "- a transition probability $P(s'\\ |\\ s, a)$ of action $a$ changing the state from $s$ to $s'$\n", + "\n", + "And the following, which an MDP does not:\n", + "- a sensor model $P(e\\ |\\ s)$ on evidence conditional on states\n", + "\n", + "Additionally, the POMDP is now uncertain of its current state hence has:\n", + "- a belief vector $b$ representing the certainty of being in each state (as a probability distribution)\n", + "\n", + "\n", + "#### New uncertainties\n", + "\n", + "It is useful to intuitively appreciate the new uncertainties that have arisen in the agent's awareness of its own state.\n", + "\n", + "- At any point, the agent has belief vector $b$, the distribution of its believed likelihood of being in each state $s$.\n", + "- For each of these states $s$ that the agent may **actually** be in, it has some set of actions given by $A(s)$.\n", + "- Each of these actions may transport it to some other state $s'$, assuming an initial state $s$, with probability $P(s'\\ |\\ s, a)$\n", + "- Once the action is performed, the agent receives a percept $e$. $P(e\\ |\\ s)$ now tells it the chances of having perceived $e$ for each state $s$. The agent must use this information to update its new belief state appropriately.\n", + "\n", + "#### Evolution of the belief vector - the `FORWARD` function\n", + "\n", + "The new belief vector $b'(s')$ after an action $a$ on the belief vector $b(s)$ and the noting of evidence $e$ is:\n", + "$$ b'(s') = \\alpha P(e\\ |\\ s') \\sum_s P(s'\\ | s, a) b(s)$$ \n", + "\n", + "where $\\alpha$ is a normalising constant (to retain the interpretation of $b$ as a probability distribution.\n", + "\n", + "This equation is just counts the sum of likelihoods of going to a state $s'$ from every possible state $s$, times the initial likelihood of being in each $s$. This is multiplied by the likelihood that the known evidence actually implies the new state $s'$. \n", + "\n", + "This function is represented as `b' = FORWARD(b, a, e)`\n", + "\n", + "#### Probability distribution of the evolving belief vector\n", + "\n", + "The goal here is to find $P(b'\\ |\\ b, a)$ - the probability that action $a$ transforms belief vector $b$ into belief vector $b'$. The following steps illustrate this -\n", + "\n", + "The probability of observing evidence $e$ when action $a$ is enacted on belief vector $b$ can be distributed over each possible new state $s'$ resulting from it:\n", + "\\begin{align*}\n", + " P(e\\ |\\ b, a) &= \\sum_{s'} P(e\\ |\\ b, a, s') P(s'\\ |\\ b, a) \\\\\n", + " &= \\sum_{s'} P(e\\ |\\ s') P(s'\\ |\\ b, a) \\\\\n", + " &= \\sum_{s'} P(e\\ |\\ s') \\sum_s P(s'\\ |\\ s, a) b(s)\n", + "\\end{align*}\n", + "\n", + "The probability of getting belief vector $b'$ from $b$ by application of action $a$ can thus be summed over all possible evidences $e$:\n", + "\\begin{align*}\n", + " P(b'\\ |\\ b, a) &= \\sum_{e} P(b'\\ |\\ b, a, e) P(e\\ |\\ b, a) \\\\\n", + " &= \\sum_{e} P(b'\\ |\\ b, a, e) \\sum_{s'} P(e\\ |\\ s') \\sum_s P(s'\\ |\\ s, a) b(s)\n", + "\\end{align*}\n", + "\n", + "where $P(b'\\ |\\ b, a, e) = 1$ if $b' = $ `FORWARD(b, a, e)` and $= 0$ otherwise.\n", + "\n", + "Given initial and final belief states $b$ and $b'$, the transition probabilities still depend on the action $a$ and observed evidence $e$. Some belief states may be achievable by certain actions, but have non-zero probabilities for states prohibited by the evidence $e$. Thus, the above condition thus ensures that only valid combinations of $(b', b, a, e)$ are considered.\n", + "\n", + "#### A modified rewardspace\n", + "\n", + "For MDPs, the reward space was simple - one reward per available state. However, for a belief vector $b(s)$, the expected reward is now:\n", + "$$\\rho(b) = \\sum_s b(s) R(s)$$\n", + "\n", + "Thus, as the belief vector can take infinite values of the distribution over states, so can the reward for each belief vector vary over a hyperplane in the belief space, or space of states (planes in an $N$-dimensional space are formed by a linear combination of the axes)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now that we know the basics, let's have a look at the `POMDP` class." + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + " Codestin Search App\n", + " \n", + " \n", + "\n", + "\n", + "

\n", + "\n", + "
class POMDP(MDP):\n",
+       "\n",
+       "    """A Partially Observable Markov Decision Process, defined by\n",
+       "    a transition model P(s'|s,a), actions A(s), a reward function R(s),\n",
+       "    and a sensor model P(e|s). We also keep track of a gamma value,\n",
+       "    for use by algorithms. The transition and the sensor models\n",
+       "    are defined as matrices. We also keep track of the possible states\n",
+       "    and actions for each state. [page 659]."""\n",
+       "\n",
+       "    def __init__(self, actions, transitions=None, evidences=None, rewards=None, states=None, gamma=0.95):\n",
+       "        """Initialize variables of the pomdp"""\n",
+       "\n",
+       "        if not (0 < gamma <= 1):\n",
+       "            raise ValueError('A POMDP must have 0 < gamma <= 1')\n",
+       "\n",
+       "        self.states = states\n",
+       "        self.actions = actions\n",
+       "\n",
+       "        # transition model cannot be undefined\n",
+       "        self.t_prob = transitions or {}\n",
+       "        if not self.t_prob:\n",
+       "            print('Warning: Transition model is undefined')\n",
+       "        \n",
+       "        # sensor model cannot be undefined\n",
+       "        self.e_prob = evidences or {}\n",
+       "        if not self.e_prob:\n",
+       "            print('Warning: Sensor model is undefined')\n",
+       "        \n",
+       "        self.gamma = gamma\n",
+       "        self.rewards = rewards\n",
+       "\n",
+       "    def remove_dominated_plans(self, input_values):\n",
+       "        """\n",
+       "        Remove dominated plans.\n",
+       "        This method finds all the lines contributing to the\n",
+       "        upper surface and removes those which don't.\n",
+       "        """\n",
+       "\n",
+       "        values = [val for action in input_values for val in input_values[action]]\n",
+       "        values.sort(key=lambda x: x[0], reverse=True)\n",
+       "\n",
+       "        best = [values[0]]\n",
+       "        y1_max = max(val[1] for val in values)\n",
+       "        tgt = values[0]\n",
+       "        prev_b = 0\n",
+       "        prev_ix = 0\n",
+       "        while tgt[1] != y1_max:\n",
+       "            min_b = 1\n",
+       "            min_ix = 0\n",
+       "            for i in range(prev_ix + 1, len(values)):\n",
+       "                if values[i][0] - tgt[0] + tgt[1] - values[i][1] != 0:\n",
+       "                    trans_b = (values[i][0] - tgt[0]) / (values[i][0] - tgt[0] + tgt[1] - values[i][1])\n",
+       "                    if 0 <= trans_b <= 1 and trans_b > prev_b and trans_b < min_b:\n",
+       "                        min_b = trans_b\n",
+       "                        min_ix = i\n",
+       "            prev_b = min_b\n",
+       "            prev_ix = min_ix\n",
+       "            tgt = values[min_ix]\n",
+       "            best.append(tgt)\n",
+       "\n",
+       "        return self.generate_mapping(best, input_values)\n",
+       "\n",
+       "    def remove_dominated_plans_fast(self, input_values):\n",
+       "        """\n",
+       "        Remove dominated plans using approximations.\n",
+       "        Resamples the upper boundary at intervals of 100 and\n",
+       "        finds the maximum values at these points.\n",
+       "        """\n",
+       "\n",
+       "        values = [val for action in input_values for val in input_values[action]]\n",
+       "        values.sort(key=lambda x: x[0], reverse=True)\n",
+       "\n",
+       "        best = []\n",
+       "        sr = 100\n",
+       "        for i in range(sr + 1):\n",
+       "            x = i / float(sr)\n",
+       "            maximum = (values[0][1] - values[0][0]) * x + values[0][0]\n",
+       "            tgt = values[0]\n",
+       "            for value in values:\n",
+       "                val = (value[1] - value[0]) * x + value[0]\n",
+       "                if val > maximum:\n",
+       "                    maximum = val\n",
+       "                    tgt = value\n",
+       "\n",
+       "            if all(any(tgt != v) for v in best):\n",
+       "                best.append(tgt)\n",
+       "\n",
+       "        return self.generate_mapping(best, input_values)\n",
+       "\n",
+       "    def generate_mapping(self, best, input_values):\n",
+       "        """Generate mappings after removing dominated plans"""\n",
+       "\n",
+       "        mapping = defaultdict(list)\n",
+       "        for value in best:\n",
+       "            for action in input_values:\n",
+       "                if any(all(value == v) for v in input_values[action]):\n",
+       "                    mapping[action].append(value)\n",
+       "\n",
+       "        return mapping\n",
+       "\n",
+       "    def max_difference(self, U1, U2):\n",
+       "        """Find maximum difference between two utility mappings"""\n",
+       "\n",
+       "        for k, v in U1.items():\n",
+       "            sum1 = 0\n",
+       "            for element in U1[k]:\n",
+       "                sum1 += sum(element)\n",
+       "            sum2 = 0\n",
+       "            for element in U2[k]:\n",
+       "                sum2 += sum(element)\n",
+       "        return abs(sum1 - sum2)\n",
+       "
\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "psource(POMDP)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `POMDP` class includes all variables of the `MDP` class and additionally also stores the sensor model in `e_prob`.\n", + "
\n", + "
\n", + "`remove_dominated_plans`, `remove_dominated_plans_fast`, `generate_mapping` and `max_difference` are helper methods for `pomdp_value_iteration` which will be explained shortly." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To understand how we can model a partially observable MDP, let's take a simple example.\n", + "Let's consider a simple two state world.\n", + "The states are labelled 0 and 1, with the reward at state 0 being 0 and at state 1 being 1.\n", + "
\n", + "There are two actions:\n", + "
\n", + "`Stay`: stays put with probability 0.9 and\n", + "`Go`: switches to the other state with probability 0.9.\n", + "
\n", + "For now, let's assume the discount factor `gamma` to be 1.\n", + "
\n", + "The sensor reports the correct state with probability 0.6.\n", + "
\n", + "This is a simple problem with a trivial solution.\n", + "Obviously the agent should `Stay` when it thinks it is in state 1 and `Go` when it thinks it is in state 0.\n", + "
\n", + "The belief space can be viewed as one-dimensional because the two probabilities must sum to 1." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's model this POMDP using the `POMDP` class." + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [], + "source": [ + "# transition probability P(s'|s,a)\n", + "t_prob = [[[0.9, 0.1], [0.1, 0.9]], [[0.1, 0.9], [0.9, 0.1]]]\n", + "# evidence function P(e|s)\n", + "e_prob = [[[0.6, 0.4], [0.4, 0.6]], [[0.6, 0.4], [0.4, 0.6]]]\n", + "# reward function\n", + "rewards = [[0.0, 0.0], [1.0, 1.0]]\n", + "# discount factor\n", + "gamma = 0.95\n", + "# actions\n", + "actions = ('0', '1')\n", + "# states\n", + "states = ('0', '1')" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [], + "source": [ + "pomdp = POMDP(actions, t_prob, e_prob, rewards, states, gamma)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We have defined our `POMDP` object." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## POMDP VALUE ITERATION\n", + "Defining a POMDP is useless unless we can find a way to solve it. As POMDPs can have infinitely many belief states, we cannot calculate one utility value for each state as we did in `value_iteration` for MDPs.\n", + "
\n", + "Instead of thinking about policies, we should think about conditional plans and how the expected utility of executing a fixed conditional plan varies with the initial belief state.\n", + "
\n", + "If we bound the depth of the conditional plans, then there are only finitely many such plans and the continuous space of belief states will generally be divided inte _regions_, each corresponding to a particular conditional plan that is optimal in that region. The utility function, being the maximum of a collection of hyperplanes, will be piecewise linear and convex.\n", + "
\n", + "For the one-step plans `Stay` and `Go`, the utility values are as follows\n", + "
\n", + "
\n", + "$$\\alpha_{|Stay|}(0) = R(0) + \\gamma(0.9R(0) + 0.1R(1)) = 0.1$$\n", + "$$\\alpha_{|Stay|}(1) = R(1) + \\gamma(0.9R(1) + 0.1R(0)) = 1.9$$\n", + "$$\\alpha_{|Go|}(0) = R(0) + \\gamma(0.9R(1) + 0.1R(0)) = 0.9$$\n", + "$$\\alpha_{|Go|}(1) = R(1) + \\gamma(0.9R(0) + 0.1R(1)) = 1.1$$" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The utility function can be found by `pomdp_value_iteration`.\n", + "
\n", + "To summarize, it generates a set of all plans consisting of an action and, for each possible next percept, a plan in U with computed utility vectors.\n", + "The dominated plans are then removed from this set and the process is repeated till the maximum difference between the utility functions of two consecutive iterations reaches a value less than a threshold value." + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "### AIMA3e\n", + "__function__ POMDP-VALUE-ITERATION(_pomdp_, _ε_) __returns__ a utility function \n", + " __inputs__: _pomdp_, a POMDP with states _S_, actions _A_(_s_), transition model _P_(_s′_ | _s_, _a_), \n", + "      sensor model _P_(_e_ | _s_), rewards _R_(_s_), discount _γ_ \n", + "     _ε_, the maximum error allowed in the utility of any state \n", + " __local variables__: _U_, _U′_, sets of plans _p_ with associated utility vectors _αp_ \n", + "\n", + " _U′_ ← a set containing just the empty plan \\[\\], with _α\\[\\]_(_s_) = _R_(_s_) \n", + " __repeat__ \n", + "   _U_ ← _U′_ \n", + "   _U′_ ← the set of all plans consisting of an action and, for each possible next percept, \n", + "     a plan in _U_ with utility vectors computed according to Equation(__??__) \n", + "   _U′_ ← REMOVE\\-DOMINATED\\-PLANS(_U′_) \n", + " __until__ MAX\\-DIFFERENCE(_U_, _U′_) < _ε_(1 − _γ_) ⁄ _γ_ \n", + " __return__ _U_ \n", + "\n", + "---\n", + "__Figure ??__ A high\\-level sketch of the value iteration algorithm for POMDPs. The REMOVE\\-DOMINATED\\-PLANS step and MAX\\-DIFFERENCE test are typically implemented as linear programs." + ], + "text/plain": [ + "" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pseudocode('POMDP-Value-Iteration')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's have a look at the `pomdp_value_iteration` function." + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + " Codestin Search App\n", + " \n", + " \n", + "\n", + "\n", + "

\n", + "\n", + "
def pomdp_value_iteration(pomdp, epsilon=0.1):\n",
+       "    """Solving a POMDP by value iteration."""\n",
+       "\n",
+       "    U = {'':[[0]* len(pomdp.states)]}\n",
+       "    count = 0\n",
+       "    while True:\n",
+       "        count += 1\n",
+       "        prev_U = U\n",
+       "        values = [val for action in U for val in U[action]]\n",
+       "        value_matxs = []\n",
+       "        for i in values:\n",
+       "            for j in values:\n",
+       "                value_matxs.append([i, j])\n",
+       "\n",
+       "        U1 = defaultdict(list)\n",
+       "        for action in pomdp.actions:\n",
+       "            for u in value_matxs:\n",
+       "                u1 = Matrix.matmul(Matrix.matmul(pomdp.t_prob[int(action)], Matrix.multiply(pomdp.e_prob[int(action)], Matrix.transpose(u))), [[1], [1]])\n",
+       "                u1 = Matrix.add(Matrix.scalar_multiply(pomdp.gamma, Matrix.transpose(u1)), [pomdp.rewards[int(action)]])\n",
+       "                U1[action].append(u1[0])\n",
+       "\n",
+       "        U = pomdp.remove_dominated_plans_fast(U1)\n",
+       "        # replace with U = pomdp.remove_dominated_plans(U1) for accurate calculations\n",
+       "        \n",
+       "        if count > 10:\n",
+       "            if pomdp.max_difference(U, prev_U) < epsilon * (1 - pomdp.gamma) / pomdp.gamma:\n",
+       "                return U\n",
+       "
\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "psource(pomdp_value_iteration)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This function uses two aptly named helper methods from the `POMDP` class, `remove_dominated_plans` and `max_difference`." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's try solving a simple one-dimensional POMDP using value-iteration.\n", + "
\n", + "Consider the problem of a user listening to voicemails.\n", + "At the end of each message, they can either _save_ or _delete_ a message.\n", + "This forms the unobservable state _S = {save, delete}_.\n", + "It is the task of the POMDP solver to guess which goal the user has.\n", + "
\n", + "The belief space has two elements, _b(s = save)_ and _b(s = delete)_.\n", + "For example, for the belief state _b = (1, 0)_, the left end of the line segment indicates _b(s = save) = 1_ and _b(s = delete) = 0_.\n", + "The intermediate points represent varying degrees of certainty in the user's goal.\n", + "
\n", + "The machine has three available actions: it can _ask_ what the user wishes to do in order to infer his or her current goal, or it can _doSave_ or _doDelete_ and move to the next message.\n", + "If the user says _save_, then an error may occur with probability 0.2, whereas if the user says _delete_, an error may occur with a probability 0.3.\n", + "
\n", + "The machine receives a large positive reward (+5) for getting the user's goal correct, a very large negative reward (-20) for taking the action _doDelete_ when the user wanted _save_, and a smaller but still significant negative reward (-10) for taking the action _doSave_ when the user wanted _delete_. \n", + "There is also a small negative reward for taking the _ask_ action (-1).\n", + "The discount factor is set to 0.95 for this example.\n", + "
\n", + "Let's define the POMDP." + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [], + "source": [ + "# transition function P(s'|s,a)\n", + "t_prob = [[[0.65, 0.35], [0.65, 0.35]], [[0.65, 0.35], [0.65, 0.35]], [[1.0, 0.0], [0.0, 1.0]]]\n", + "# evidence function P(e|s)\n", + "e_prob = [[[0.5, 0.5], [0.5, 0.5]], [[0.5, 0.5], [0.5, 0.5]], [[0.8, 0.2], [0.3, 0.7]]]\n", + "# reward function\n", + "rewards = [[5, -10], [-20, 5], [-1, -1]]\n", + "\n", + "gamma = 0.95\n", + "actions = ('0', '1', '2')\n", + "states = ('0', '1')\n", + "\n", + "pomdp = POMDP(actions, t_prob, e_prob, rewards, states, gamma)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We have defined the `POMDP` object.\n", + "Let's run `pomdp_value_iteration` to find the utility function." + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [], + "source": [ + "utility = pomdp_value_iteration(pomdp, epsilon=0.1)" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "%matplotlib inline\n", + "plot_pomdp_utility(utility)" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -2221,7 +2988,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.1" + "version": "3.6.4" }, "widgets": { "state": { @@ -4714,4 +5481,3 @@ "nbformat": 4, "nbformat_minor": 1 } - diff --git a/mdp.py b/mdp.py index b9a6eaea0..657334d59 100644 --- a/mdp.py +++ b/mdp.py @@ -9,6 +9,8 @@ from utils import argmax, vector_add, orientations, turn_right, turn_left import random +import numpy as np +from collections import defaultdict class MDP: @@ -51,11 +53,13 @@ def __init__(self, init, actlist, terminals, transitions=None, reward=None, stat def R(self, state): """Return a numeric reward for this state.""" + return self.reward[state] def T(self, state, action): """Transition model. From a state and an action, return a list of (probability, result-state) pairs.""" + if not self.transitions: raise ValueError("Transition model is missing") else: @@ -65,6 +69,7 @@ def actions(self, state): """Return a list of actions that can be performed in this state. By default, a fixed list of actions, except for terminal states. Override this method if you need to specialize by state.""" + if state in self.terminals: return [None] else: @@ -106,7 +111,10 @@ def check_consistency(self): class MDP2(MDP): - """Inherits from MDP. Handles terminal states, and transitions to and from terminal states better.""" + """ + Inherits from MDP. Handles terminal states, and transitions to and from terminal states better. + """ + def __init__(self, init, actlist, terminals, transitions, reward=None, gamma=0.9): MDP.__init__(self, init, actlist, terminals, transitions, reward, gamma=gamma) @@ -160,11 +168,13 @@ def T(self, state, action): def go(self, state, direction): """Return the state that results from going in this direction.""" + state1 = vector_add(state, direction) return state1 if state1 in self.states else state def to_grid(self, mapping): """Convert a mapping from (x, y) to v into a [[..., v, ...]] grid.""" + return list(reversed([[mapping.get((x, y), None) for x in range(self.cols)] for y in range(self.rows)])) @@ -190,6 +200,7 @@ def to_arrows(self, policy): def value_iteration(mdp, epsilon=0.001): """Solving an MDP by value iteration. [Figure 17.4]""" + U1 = {s: 0 for s in mdp.states} R, T, gamma = mdp.R, mdp.T, mdp.gamma while True: @@ -206,6 +217,7 @@ def value_iteration(mdp, epsilon=0.001): def best_policy(mdp, U): """Given an MDP and a utility function U, determine the best policy, as a mapping from state to action. (Equation 17.4)""" + pi = {} for s in mdp.states: pi[s] = argmax(mdp.actions(s), key=lambda a: expected_utility(a, s, U, mdp)) @@ -214,6 +226,7 @@ def best_policy(mdp, U): def expected_utility(a, s, U, mdp): """The expected utility of doing a in state s, according to the MDP and U.""" + return sum(p*U[s1] for (p, s1) in mdp.T(s, a)) # ______________________________________________________________________________ @@ -221,6 +234,7 @@ def expected_utility(a, s, U, mdp): def policy_iteration(mdp): """Solve an MDP by policy iteration [Figure 17.7]""" + U = {s: 0 for s in mdp.states} pi = {s: random.choice(mdp.actions(s)) for s in mdp.states} while True: @@ -238,6 +252,7 @@ def policy_iteration(mdp): def policy_evaluation(pi, U, mdp, k=20): """Return an updated utility mapping U from each state in the MDP to its utility, using an approximation (modified policy iteration).""" + R, T, gamma = mdp.R, mdp.T, mdp.gamma for i in range(k): for s in mdp.states: @@ -245,6 +260,198 @@ def policy_evaluation(pi, U, mdp, k=20): return U +class POMDP(MDP): + + """A Partially Observable Markov Decision Process, defined by + a transition model P(s'|s,a), actions A(s), a reward function R(s), + and a sensor model P(e|s). We also keep track of a gamma value, + for use by algorithms. The transition and the sensor models + are defined as matrices. We also keep track of the possible states + and actions for each state. [page 659].""" + + def __init__(self, actions, transitions=None, evidences=None, rewards=None, states=None, gamma=0.95): + """Initialize variables of the pomdp""" + + if not (0 < gamma <= 1): + raise ValueError('A POMDP must have 0 < gamma <= 1') + + self.states = states + self.actions = actions + + # transition model cannot be undefined + self.t_prob = transitions or {} + if not self.t_prob: + print('Warning: Transition model is undefined') + + # sensor model cannot be undefined + self.e_prob = evidences or {} + if not self.e_prob: + print('Warning: Sensor model is undefined') + + self.gamma = gamma + self.rewards = rewards + + def remove_dominated_plans(self, input_values): + """ + Remove dominated plans. + This method finds all the lines contributing to the + upper surface and removes those which don't. + """ + + values = [val for action in input_values for val in input_values[action]] + values.sort(key=lambda x: x[0], reverse=True) + + best = [values[0]] + y1_max = max(val[1] for val in values) + tgt = values[0] + prev_b = 0 + prev_ix = 0 + while tgt[1] != y1_max: + min_b = 1 + min_ix = 0 + for i in range(prev_ix + 1, len(values)): + if values[i][0] - tgt[0] + tgt[1] - values[i][1] != 0: + trans_b = (values[i][0] - tgt[0]) / (values[i][0] - tgt[0] + tgt[1] - values[i][1]) + if 0 <= trans_b <= 1 and trans_b > prev_b and trans_b < min_b: + min_b = trans_b + min_ix = i + prev_b = min_b + prev_ix = min_ix + tgt = values[min_ix] + best.append(tgt) + + return self.generate_mapping(best, input_values) + + def remove_dominated_plans_fast(self, input_values): + """ + Remove dominated plans using approximations. + Resamples the upper boundary at intervals of 100 and + finds the maximum values at these points. + """ + + values = [val for action in input_values for val in input_values[action]] + values.sort(key=lambda x: x[0], reverse=True) + + best = [] + sr = 100 + for i in range(sr + 1): + x = i / float(sr) + maximum = (values[0][1] - values[0][0]) * x + values[0][0] + tgt = values[0] + for value in values: + val = (value[1] - value[0]) * x + value[0] + if val > maximum: + maximum = val + tgt = value + + if all(any(tgt != v) for v in best): + best.append(np.array(tgt)) + + return self.generate_mapping(best, input_values) + + def generate_mapping(self, best, input_values): + """Generate mappings after removing dominated plans""" + + mapping = defaultdict(list) + for value in best: + for action in input_values: + if any(all(value == v) for v in input_values[action]): + mapping[action].append(value) + + return mapping + + def max_difference(self, U1, U2): + """Find maximum difference between two utility mappings""" + + for k, v in U1.items(): + sum1 = 0 + for element in U1[k]: + sum1 += sum(element) + sum2 = 0 + for element in U2[k]: + sum2 += sum(element) + return abs(sum1 - sum2) + + +class Matrix: + """Matrix operations class""" + + @staticmethod + def add(A, B): + """Add two matrices A and B""" + + res = [] + for i in range(len(A)): + row = [] + for j in range(len(A[0])): + row.append(A[i][j] + B[i][j]) + res.append(row) + return res + + @staticmethod + def scalar_multiply(a, B): + """Multiply scalar a to matrix B""" + + for i in range(len(B)): + for j in range(len(B[0])): + B[i][j] = a * B[i][j] + return B + + @staticmethod + def multiply(A, B): + """Multiply two matrices A and B element-wise""" + + matrix = [] + for i in range(len(B)): + row = [] + for j in range(len(B[0])): + row.append(B[i][j] * A[j][i]) + matrix.append(row) + + return matrix + + @staticmethod + def matmul(A, B): + """Inner-product of two matrices""" + + return [[sum(ele_a*ele_b for ele_a, ele_b in zip(row_a, col_b)) for col_b in list(zip(*B))] for row_a in A] + + @staticmethod + def transpose(A): + """Transpose a matrix""" + + return [list(i) for i in zip(*A)] + + +def pomdp_value_iteration(pomdp, epsilon=0.1): + """Solving a POMDP by value iteration.""" + + U = {'':[[0]* len(pomdp.states)]} + count = 0 + while True: + count += 1 + prev_U = U + values = [val for action in U for val in U[action]] + value_matxs = [] + for i in values: + for j in values: + value_matxs.append([i, j]) + + U1 = defaultdict(list) + for action in pomdp.actions: + for u in value_matxs: + u1 = Matrix.matmul(Matrix.matmul(pomdp.t_prob[int(action)], Matrix.multiply(pomdp.e_prob[int(action)], Matrix.transpose(u))), [[1], [1]]) + u1 = Matrix.add(Matrix.scalar_multiply(pomdp.gamma, Matrix.transpose(u1)), [pomdp.rewards[int(action)]]) + U1[action].append(u1[0]) + + U = pomdp.remove_dominated_plans_fast(U1) + # replace with U = pomdp.remove_dominated_plans(U1) for accurate calculations + + if count > 10: + if pomdp.max_difference(U, prev_U) < epsilon * (1 - pomdp.gamma) / pomdp.gamma: + return U + + __doc__ += """ >>> pi = best_policy(sequential_decision_environment, value_iteration(sequential_decision_environment, .01)) diff --git a/mdp_apps.ipynb b/mdp_apps.ipynb index 50dce5427..da3ae7b06 100644 --- a/mdp_apps.ipynb +++ b/mdp_apps.ipynb @@ -7,15 +7,13 @@ "# APPLICATIONS OF MARKOV DECISION PROCESSES\n", "---\n", "In this notebook we will take a look at some indicative applications of markov decision processes. \n", - "We will cover content from [`mdp.py`](https://github.com/aimacode/aima-python/blob/master/mdp.py), for chapter 17 of Stuart Russel's and Peter Norvig's book [*Artificial Intelligence: A Modern Approach*](http://aima.cs.berkeley.edu/)." + "We will cover content from [`mdp.py`](https://github.com/aimacode/aima-python/blob/master/mdp.py), for **Chapter 17 Making Complex Decisions** of Stuart Russel's and Peter Norvig's book [*Artificial Intelligence: A Modern Approach*](http://aima.cs.berkeley.edu/).\n" ] }, { "cell_type": "code", "execution_count": 1, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "from mdp import *\n", @@ -33,7 +31,14 @@ " - State, action and next state dependent reward function\n", "- Grid MDP\n", " - Pathfinding problem\n", - "\n", + "- POMDP\n", + " - Two state POMDP" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ "## SIMPLE MDP\n", "---\n", "### State dependent reward function\n", @@ -1429,6 +1434,371 @@ "As you can infer, we can find the path to the terminal state starting from any given state using this policy.\n", "All maze problems can be solved by formulating it as a MDP." ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## POMDP\n", + "### Two state POMDP\n", + "Let's consider a problem where we have two doors, one to our left and one to our right.\n", + "One of these doors opens to a room with a tiger in it, and the other one opens to an empty hall.\n", + "
\n", + "We will call our two states `0` and `1` for `left` and `right` respectively.\n", + "
\n", + "The possible actions we can take are as follows:\n", + "
\n", + "1. __Open-left__: Open the left door.\n", + "Represented by `0`.\n", + "2. __Open-right__: Open the right door.\n", + "Represented by `1`.\n", + "3. __Listen__: Listen carefully to one side and possibly hear the tiger breathing.\n", + "Represented by `2`.\n", + "\n", + "
\n", + "The possible observations we can get are as follows:\n", + "
\n", + "1. __TL__: Tiger seems to be at the left door.\n", + "2. __TR__: Tiger seems to be at the right door.\n", + "\n", + "
\n", + "The reward function is as follows:\n", + "
\n", + "We get +10 reward for opening the door to the empty hall and we get -100 reward for opening the other door and setting the tiger free.\n", + "
\n", + "Listening costs us -1 reward.\n", + "
\n", + "We want to minimize our chances of setting the tiger free.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Our transition probabilities can be defined as:\n", + "
\n", + "
\n", + "Action `0` (Open left door)\n", + "$\\\\\n", + " P(0) = \n", + " \\left[ {\\begin{array}{cc}\n", + " 0.5 & 0.5 \\\\\n", + " 0.5 & 0.5 \\\\\n", + " \\end{array}}\\right] \\\\\n", + " \\\\\n", + " $\n", + " \n", + "Action `1` (Open right door)\n", + "$\\\\\n", + " P(1) = \n", + " \\left[ {\\begin{array}{cc}\n", + " 0.5 & 0.5 \\\\\n", + " 0.5 & 0.5 \\\\\n", + " \\end{array}}\\right] \\\\\n", + " \\\\\n", + " $\n", + " \n", + "Action `2` (Listen)\n", + "$\\\\\n", + " P(2) = \n", + " \\left[ {\\begin{array}{cc}\n", + " 1.0 & 0.0 \\\\\n", + " 0.0 & 1.0 \\\\\n", + " \\end{array}}\\right] \\\\\n", + " \\\\\n", + " $\n", + " \n", + "
\n", + "
\n", + "Our observation probabilities can be defined as:\n", + "
\n", + "
\n", + "$\\\\\n", + " O(0) = \n", + " \\left[ {\\begin{array}{ccc}\n", + " Open left & TL & TR \\\\\n", + " Tiger: left & 0.5 & 0.5 \\\\\n", + " Tiger: right & 0.5 & 0.5 \\\\\n", + " \\end{array}}\\right] \\\\\n", + " \\\\\n", + " $\n", + "\n", + "$\\\\\n", + " O(1) = \n", + " \\left[ {\\begin{array}{ccc}\n", + " Open right & TL & TR \\\\\n", + " Tiger: left & 0.5 & 0.5 \\\\\n", + " Tiger: right & 0.5 & 0.5 \\\\\n", + " \\end{array}}\\right] \\\\\n", + " \\\\\n", + " $\n", + "\n", + "$\\\\\n", + " O(2) = \n", + " \\left[ {\\begin{array}{ccc}\n", + " Listen & TL & TR \\\\\n", + " Tiger: left & 0.85 & 0.15 \\\\\n", + " Tiger: right & 0.15 & 0.85 \\\\\n", + " \\end{array}}\\right] \\\\\n", + " \\\\\n", + " $\n", + "\n", + "
\n", + "
\n", + "The rewards of this POMDP are defined as:\n", + "
\n", + "
\n", + "$\\\\\n", + " R(0) = \n", + " \\left[ {\\begin{array}{cc}\n", + " Openleft & Reward \\\\\n", + " Tiger: left & -100 \\\\\n", + " Tiger: right & +10 \\\\\n", + " \\end{array}}\\right] \\\\\n", + " \\\\\n", + " $\n", + " \n", + "$\\\\\n", + " R(1) = \n", + " \\left[ {\\begin{array}{cc}\n", + " Openright & Reward \\\\\n", + " Tiger: left & +10 \\\\\n", + " Tiger: right & -100 \\\\\n", + " \\end{array}}\\right] \\\\\n", + " \\\\\n", + " $\n", + " \n", + "$\\\\\n", + " R(2) = \n", + " \\left[ {\\begin{array}{cc}\n", + " Listen & Reward \\\\\n", + " Tiger: left & -1 \\\\\n", + " Tiger: right & -1 \\\\\n", + " \\end{array}}\\right] \\\\\n", + " \\\\\n", + " $\n", + " \n", + "
\n", + "Based on these matrices, we will initialize our variables." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's first define our transition state." + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [], + "source": [ + "t_prob = [[[0.5, 0.5], \n", + " [0.5, 0.5]], \n", + " \n", + " [[0.5, 0.5], \n", + " [0.5, 0.5]], \n", + " \n", + " [[1.0, 0.0], \n", + " [0.0, 1.0]]]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Followed by the observation model." + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [], + "source": [ + "e_prob = [[[0.5, 0.5], \n", + " [0.5, 0.5]], \n", + " \n", + " [[0.5, 0.5], \n", + " [0.5, 0.5]], \n", + " \n", + " [[0.85, 0.15], \n", + " [0.15, 0.85]]]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And the reward model." + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [], + "source": [ + "rewards = [[-100, 10], \n", + " [10, -100], \n", + " [-1, -1]]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's now define our states, observations and actions.\n", + "
\n", + "We will use `gamma` = 0.95 for this example.\n", + "
" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [], + "source": [ + "# 0: open-left, 1: open-right, 2: listen\n", + "actions = ('0', '1', '2')\n", + "# 0: left, 1: right\n", + "states = ('0', '1')\n", + "\n", + "gamma = 0.95" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We have all the required variables to instantiate an object of the `POMDP` class." + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [], + "source": [ + "pomdp = POMDP(actions, t_prob, e_prob, rewards, states, gamma)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can now find the utility function by running `pomdp_value_iteration` on our `pomdp` object." + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "defaultdict(list,\n", + " {'0': [array([-83.05169196, 26.94830804])],\n", + " '1': [array([ 26.94830804, -83.05169196])],\n", + " '2': [array([23.55049363, -0.76359097]),\n", + " array([23.55049363, -0.76359097]),\n", + " array([23.55049363, -0.76359097]),\n", + " array([23.55049363, -0.76359097]),\n", + " array([23.24120177, 1.56028929]),\n", + " array([23.24120177, 1.56028929]),\n", + " array([23.24120177, 1.56028929]),\n", + " array([20.0874279 , 15.03900771]),\n", + " array([20.0874279 , 15.03900771]),\n", + " array([20.0874279 , 15.03900771]),\n", + " array([20.0874279 , 15.03900771]),\n", + " array([17.91696135, 17.91696135]),\n", + " array([17.91696135, 17.91696135]),\n", + " array([17.91696135, 17.91696135]),\n", + " array([17.91696135, 17.91696135]),\n", + " array([17.91696135, 17.91696135]),\n", + " array([15.03900771, 20.0874279 ]),\n", + " array([15.03900771, 20.0874279 ]),\n", + " array([15.03900771, 20.0874279 ]),\n", + " array([15.03900771, 20.0874279 ]),\n", + " array([ 1.56028929, 23.24120177]),\n", + " array([ 1.56028929, 23.24120177]),\n", + " array([ 1.56028929, 23.24120177]),\n", + " array([-0.76359097, 23.55049363]),\n", + " array([-0.76359097, 23.55049363]),\n", + " array([-0.76359097, 23.55049363]),\n", + " array([-0.76359097, 23.55049363])]})" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "utility = pomdp_value_iteration(pomdp, epsilon=3)\n", + "utility" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "%matplotlib inline\n", + "\n", + "def plot_utility(utility):\n", + " open_left = utility['0'][0]\n", + " open_right = utility['1'][0]\n", + " listen_left = utility['2'][0]\n", + " listen_right = utility['2'][-1]\n", + " left = (open_left[0] - listen_left[0]) / (open_left[0] - listen_left[0] + listen_left[1] - open_left[1])\n", + " right = (open_right[0] - listen_right[0]) / (open_right[0] - listen_right[0] + listen_right[1] - open_right[1])\n", + " \n", + " colors = ['g', 'b', 'k']\n", + " for action in utility:\n", + " for value in utility[action]:\n", + " plt.plot(value, color=colors[int(action)])\n", + " plt.vlines([left, right], -10, 35, linestyles='dashed', colors='c')\n", + " plt.ylim(-10, 35)\n", + " plt.xlim(0, 1)\n", + " plt.text(left/2 - 0.35, 30, 'open-left')\n", + " plt.text((right + left)/2 - 0.04, 30, 'listen')\n", + " plt.text((right + 1)/2 + 0.22, 30, 'open-right')\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plot_utility(utility)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Hence, we get a piecewise-continuous utility function consistent with the given POMDP." + ] } ], "metadata": { @@ -1447,7 +1817,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.1" + "version": "3.6.4" } }, "nbformat": 4, diff --git a/notebook.py b/notebook.py index 263f7a44b..80062d9f6 100644 --- a/notebook.py +++ b/notebook.py @@ -1087,3 +1087,24 @@ def gaussian_kernel(l=5, sig=1.0): xx, yy = np.meshgrid(ax, ax) kernel = np.exp(-(xx**2 + yy**2) / (2. * sig**2)) return kernel + +# Plots utility function for a POMDP +def plot_pomdp_utility(utility): + save = utility['0'][0] + delete = utility['1'][0] + ask_save = utility['2'][0] + ask_delete = utility['2'][-1] + left = (save[0] - ask_save[0]) / (save[0] - ask_save[0] + ask_save[1] - save[1]) + right = (delete[0] - ask_delete[0]) / (delete[0] - ask_delete[0] + ask_delete[1] - delete[1]) + + colors = ['g', 'b', 'k'] + for action in utility: + for value in utility[action]: + plt.plot(value, color=colors[int(action)]) + plt.vlines([left, right], -20, 10, linestyles='dashed', colors='c') + plt.ylim(-20, 13) + plt.xlim(0, 1) + plt.text(left/2 - 0.05, 10, 'Save') + plt.text((right + left)/2 - 0.02, 10, 'Ask') + plt.text((right + 1)/2 - 0.07, 10, 'Delete') + plt.show() diff --git a/planning.ipynb b/planning.ipynb index fd21a6e88..ca54bcde2 100644 --- a/planning.ipynb +++ b/planning.ipynb @@ -19,7 +19,7 @@ "This notebook uses implementations from the [planning.py](https://github.com/aimacode/aima-python/blob/master/planning.py) module. \n", "See the [intro notebook](https://github.com/aimacode/aima-python/blob/master/intro.ipynb) for instructions.\n", "\n", - "We'll start by looking at `PDDL` and `Action` data types for defining problems and actions. \n", + "We'll start by looking at `PlanningProblem` and `Action` data types for defining problems and actions. \n", "Then, we will see how to use them by trying to plan a trip from *Sibiu* to *Bucharest* across the familiar map of Romania, from [search.ipynb](https://github.com/aimacode/aima-python/blob/master/search.ipynb) \n", "followed by some common planning problems and methods of solving them.\n", "\n", @@ -44,26 +44,41 @@ "source": [ "## CONTENTS\n", "\n", - "- PDDL\n", + "**Classical Planning**\n", + "- PlanningProblem\n", "- Action\n", "- Planning Problems\n", " * Air cargo problem\n", " * Spare tire problem\n", " * Three block tower problem\n", " * Shopping Problem\n", + " * Socks and shoes problem\n", " * Cake problem\n", "- Solving Planning Problems\n", - " * GraphPlan" + " * GraphPlan\n", + " * Linearize\n", + " * PartialOrderPlanner\n", + "
\n", + "\n", + "**Planning in the real world**\n", + "- Problem\n", + "- HLA\n", + "- Planning Problems\n", + " * Job shop problem\n", + " * Double tennis problem\n", + "- Solving Planning Problems\n", + " * Hierarchical Search\n", + " * Angelic Search" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## PDDL\n", + "## PlanningProblem\n", "\n", "PDDL stands for Planning Domain Definition Language.\n", - "The `PDDL` class is used to represent planning problems in this module. The following attributes are essential to be able to define a problem:\n", + "The `PlanningProblem` class is used to represent planning problems in this module. The following attributes are essential to be able to define a problem:\n", "* an initial state\n", "* a set of goals\n", "* a set of viable actions that can be executed in the search space of the problem\n", @@ -165,29 +180,41 @@ "\n", "

\n", "\n", - "
class PDDL:\n",
+       "
class PlanningProblem:\n",
        "    """\n",
-       "    Planning Domain Definition Language (PDDL) used to define a search problem.\n",
+       "    Planning Domain Definition Language (PlanningProblem) used to define a search problem.\n",
        "    It stores states in a knowledge base consisting of first order logic statements.\n",
        "    The conjunction of these logical statements completely defines a state.\n",
        "    """\n",
        "\n",
        "    def __init__(self, init, goals, actions):\n",
        "        self.init = self.convert(init)\n",
-       "        self.goals = expr(goals)\n",
+       "        self.goals = self.convert(goals)\n",
        "        self.actions = actions\n",
        "\n",
-       "    def convert(self, init):\n",
+       "    def convert(self, clauses):\n",
        "        """Converts strings into exprs"""\n",
+       "        if not isinstance(clauses, Expr):\n",
+       "            if len(clauses) > 0:\n",
+       "                clauses = expr(clauses)\n",
+       "            else:\n",
+       "                clauses = []\n",
        "        try:\n",
-       "            init = conjuncts(expr(init))\n",
+       "            clauses = conjuncts(clauses)\n",
        "        except AttributeError:\n",
-       "            init = expr(init)\n",
-       "        return init\n",
+       "            clauses = clauses\n",
+       "\n",
+       "        new_clauses = []\n",
+       "        for clause in clauses:\n",
+       "            if clause.op == '~':\n",
+       "                new_clauses.append(expr('Not' + str(clause.args[0])))\n",
+       "            else:\n",
+       "                new_clauses.append(clause)\n",
+       "        return new_clauses\n",
        "\n",
        "    def goal_test(self):\n",
        "        """Checks if the goals have been reached"""\n",
-       "        return all(goal in self.init for goal in conjuncts(self.goals))\n",
+       "        return all(goal in self.init for goal in self.goals)\n",
        "\n",
        "    def act(self, action):\n",
        "        """\n",
@@ -215,7 +242,7 @@
     }
    ],
    "source": [
-    "psource(PDDL)"
+    "psource(PlanningProblem)"
    ]
   },
   {
@@ -350,7 +377,7 @@
        "
class Action:\n",
        "    """\n",
        "    Defines an action schema using preconditions and effects.\n",
-       "    Use this to describe actions in PDDL.\n",
+       "    Use this to describe actions in PlanningProblem.\n",
        "    action is an Expr where variables are given as arguments(args).\n",
        "    Precondition and effect are both lists with positive and negative literals.\n",
        "    Negative preconditions and effects are defined by adding a 'Not' before the name of the clause\n",
@@ -361,34 +388,38 @@
        "    """\n",
        "\n",
        "    def __init__(self, action, precond, effect):\n",
-       "        action = expr(action)\n",
+       "        if isinstance(action, str):\n",
+       "            action = expr(action)\n",
        "        self.name = action.op\n",
        "        self.args = action.args\n",
-       "        self.precond, self.effect = self.convert(precond, effect)\n",
+       "        self.precond = self.convert(precond)\n",
+       "        self.effect = self.convert(effect)\n",
        "\n",
        "    def __call__(self, kb, args):\n",
        "        return self.act(kb, args)\n",
        "\n",
-       "    def convert(self, precond, effect):\n",
+       "    def __repr__(self):\n",
+       "        return '{}({})'.format(self.__class__.__name__, Expr(self.name, *self.args))\n",
+       "\n",
+       "    def convert(self, clauses):\n",
        "        """Converts strings into Exprs"""\n",
+       "        if isinstance(clauses, Expr):\n",
+       "            clauses = conjuncts(clauses)\n",
+       "            for i in range(len(clauses)):\n",
+       "                if clauses[i].op == '~':\n",
+       "                    clauses[i] = expr('Not' + str(clauses[i].args[0]))\n",
        "\n",
-       "        precond = precond.replace('~', 'Not')\n",
-       "        if len(precond) > 0:\n",
-       "            precond = expr(precond)\n",
-       "        effect = effect.replace('~', 'Not')\n",
-       "        if len(effect) > 0:\n",
-       "            effect = expr(effect)\n",
+       "        elif isinstance(clauses, str):\n",
+       "            clauses = clauses.replace('~', 'Not')\n",
+       "            if len(clauses) > 0:\n",
+       "                clauses = expr(clauses)\n",
        "\n",
-       "        try:\n",
-       "            precond = conjuncts(precond)\n",
-       "        except AttributeError:\n",
-       "            pass\n",
-       "        try:\n",
-       "            effect = conjuncts(effect)\n",
-       "        except AttributeError:\n",
-       "            pass\n",
+       "            try:\n",
+       "                clauses = conjuncts(clauses)\n",
+       "            except AttributeError:\n",
+       "                pass\n",
        "\n",
-       "        return precond, effect\n",
+       "        return clauses\n",
        "\n",
        "    def substitute(self, e, args):\n",
        "        """Replaces variables in expression with their respective Propositional symbol"""\n",
@@ -405,7 +436,6 @@
        "\n",
        "        if isinstance(kb, list):\n",
        "            kb = FolKB(kb)\n",
-       "\n",
        "        for clause in self.precond:\n",
        "            if self.substitute(clause, args) not in kb.clauses:\n",
        "                return False\n",
@@ -676,7 +706,7 @@
    },
    "outputs": [],
    "source": [
-    "prob = PDDL(knowledge_base, goals, [fly_s_b, fly_b_s, fly_s_c, fly_c_s, fly_b_c, fly_c_b, drive])"
+    "prob = PlanningProblem(knowledge_base, goals, [fly_s_b, fly_b_s, fly_s_c, fly_c_s, fly_b_c, fly_c_b, drive])"
    ]
   },
   {
@@ -793,12 +823,34 @@
        "

\n", "\n", "
def air_cargo():\n",
-       "    """Air cargo problem"""\n",
+       "    """\n",
+       "    [Figure 10.1] AIR-CARGO-PROBLEM\n",
+       "\n",
+       "    An air-cargo shipment problem for delivering cargo to different locations,\n",
+       "    given the starting location and airplanes.\n",
+       "\n",
+       "    Example:\n",
+       "    >>> from planning import *\n",
+       "    >>> ac = air_cargo()\n",
+       "    >>> ac.goal_test()\n",
+       "    False\n",
+       "    >>> ac.act(expr('Load(C2, P2, JFK)'))\n",
+       "    >>> ac.act(expr('Load(C1, P1, SFO)'))\n",
+       "    >>> ac.act(expr('Fly(P1, SFO, JFK)'))\n",
+       "    >>> ac.act(expr('Fly(P2, JFK, SFO)'))\n",
+       "    >>> ac.act(expr('Unload(C2, P2, SFO)'))\n",
+       "    >>> ac.goal_test()\n",
+       "    False\n",
+       "    >>> ac.act(expr('Unload(C1, P1, JFK)'))\n",
+       "    >>> ac.goal_test()\n",
+       "    True\n",
+       "    >>>\n",
+       "    """\n",
        "\n",
-       "    return PDDL(init='At(C1, SFO) & At(C2, JFK) & At(P1, SFO) & At(P2, JFK) & Cargo(C1) & Cargo(C2) & Plane(P1) & Plane(P2) & Airport(SFO) & Airport(JFK)',\n",
-       "                goals='At(C1, JFK) & At(C2, SFO)', \n",
+       "    return PlanningProblem(init='At(C1, SFO) & At(C2, JFK) & At(P1, SFO) & At(P2, JFK) & Cargo(C1) & Cargo(C2) & Plane(P1) & Plane(P2) & Airport(SFO) & Airport(JFK)', \n",
+       "                goals='At(C1, JFK) & At(C2, SFO)',\n",
        "                actions=[Action('Load(c, p, a)', \n",
-       "                                precond='At(c, a) & At(p, a) & Cargo(c) & Plane(p) & Airport(a)', \n",
+       "                                precond='At(c, a) & At(p, a) & Cargo(c) & Plane(p) & Airport(a)',\n",
        "                                effect='In(c, p) & ~At(c, a)'),\n",
        "                         Action('Unload(c, p, a)',\n",
        "                                precond='In(c, p) & At(p, a) & Cargo(c) & Plane(p) & Airport(a)',\n",
@@ -886,7 +938,7 @@
    "metadata": {},
    "source": [
     "It returns False because the goal state is not yet reached. Now, we define the sequence of actions that it should take in order to achieve the goal.\n",
-    "The actions are then carried out on the `airCargo` PDDL.\n",
+    "The actions are then carried out on the `airCargo` PlanningProblem.\n",
     "\n",
     "The actions available to us are the following: Load, Unload, Fly\n",
     "\n",
@@ -1060,9 +1112,27 @@
        "

\n", "\n", "
def spare_tire():\n",
-       "    """Spare tire problem"""\n",
+       "    """[Figure 10.2] SPARE-TIRE-PROBLEM\n",
+       "\n",
+       "    A problem involving changing the flat tire of a car\n",
+       "    with a spare tire from the trunk.\n",
+       "\n",
+       "    Example:\n",
+       "    >>> from planning import *\n",
+       "    >>> st = spare_tire()\n",
+       "    >>> st.goal_test()\n",
+       "    False\n",
+       "    >>> st.act(expr('Remove(Spare, Trunk)'))\n",
+       "    >>> st.act(expr('Remove(Flat, Axle)'))\n",
+       "    >>> st.goal_test()\n",
+       "    False\n",
+       "    >>> st.act(expr('PutOn(Spare, Axle)'))\n",
+       "    >>> st.goal_test()\n",
+       "    True\n",
+       "    >>>\n",
+       "    """\n",
        "\n",
-       "    return PDDL(init='Tire(Flat) & Tire(Spare) & At(Flat, Axle) & At(Spare, Trunk)',\n",
+       "    return PlanningProblem(init='Tire(Flat) & Tire(Spare) & At(Flat, Axle) & At(Spare, Trunk)',\n",
        "                goals='At(Spare, Axle) & At(Flat, Ground)',\n",
        "                actions=[Action('Remove(obj, loc)',\n",
        "                                precond='At(obj, loc)',\n",
@@ -1144,7 +1214,7 @@
    "source": [
     "As we can see, it hasn't completed the goal. \n",
     "We now define a possible solution that can help us reach the goal of having a spare tire mounted onto the car's axle. \n",
-    "The actions are then carried out on the `spareTire` PDDL.\n",
+    "The actions are then carried out on the `spareTire` PlanningProblem.\n",
     "\n",
     "The actions available to us are the following: Remove, PutOn\n",
     "\n",
@@ -1369,9 +1439,28 @@
        "

\n", "\n", "
def three_block_tower():\n",
-       "    """Sussman Anomaly problem"""\n",
+       "    """\n",
+       "    [Figure 10.3] THREE-BLOCK-TOWER\n",
+       "\n",
+       "    A blocks-world problem of stacking three blocks in a certain configuration,\n",
+       "    also known as the Sussman Anomaly.\n",
        "\n",
-       "    return PDDL(init='On(A, Table) & On(B, Table) & On(C, A) & Block(A) & Block(B) & Block(C) & Clear(B) & Clear(C)',\n",
+       "    Example:\n",
+       "    >>> from planning import *\n",
+       "    >>> tbt = three_block_tower()\n",
+       "    >>> tbt.goal_test()\n",
+       "    False\n",
+       "    >>> tbt.act(expr('MoveToTable(C, A)'))\n",
+       "    >>> tbt.act(expr('Move(B, Table, C)'))\n",
+       "    >>> tbt.goal_test()\n",
+       "    False\n",
+       "    >>> tbt.act(expr('Move(A, Table, B)'))\n",
+       "    >>> tbt.goal_test()\n",
+       "    True\n",
+       "    >>>\n",
+       "    """\n",
+       "\n",
+       "    return PlanningProblem(init='On(A, Table) & On(B, Table) & On(C, A) & Block(A) & Block(B) & Block(C) & Clear(B) & Clear(C)',\n",
        "                goals='On(A, B) & On(B, C)',\n",
        "                actions=[Action('Move(b, x, y)',\n",
        "                                precond='On(b, x) & Clear(b) & Clear(y) & Block(b) & Block(y)',\n",
@@ -1453,7 +1542,7 @@
    "source": [
     "As we can see, it hasn't completed the goal. \n",
     "We now define a sequence of actions that can stack three blocks in the required order. \n",
-    "The actions are then carried out on the `threeBlockTower` PDDL.\n",
+    "The actions are then carried out on the `threeBlockTower` PlanningProblem.\n",
     "\n",
     "The actions available to us are the following: MoveToTable, Move\n",
     "\n",
@@ -1513,16 +1602,9 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Shopping Problem"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "This problem requires us to acquire a carton of milk, a banana and a drill.\n",
-    "Initially, we start from home and it is known to us that milk and bananas are available in the supermarket and the hardware store sells drills.\n",
-    "Let's take a look at the definition of the `shopping_problem` in the module."
+    "The `three_block_tower` problem can also be defined in simpler terms using just two actions `ToTable(x, y)` and `FromTable(x, y)`.\n",
+    "The underlying problem remains the same however, stacking up three blocks in a certain configuration given a particular starting state.\n",
+    "Let's have a look at the alternative definition."
    ]
   },
   {
@@ -1619,17 +1701,35 @@
        "\n",
        "

\n", "\n", - "
def shopping_problem():\n",
-       "    """Shopping problem"""\n",
+       "
def simple_blocks_world():\n",
+       "    """\n",
+       "    SIMPLE-BLOCKS-WORLD\n",
        "\n",
-       "    return PDDL(init='At(Home) & Sells(SM, Milk) & Sells(SM, Banana) & Sells(HW, Drill)',\n",
-       "                goals='Have(Milk) & Have(Banana) & Have(Drill)', \n",
-       "                actions=[Action('Buy(x, store)',\n",
-       "                                precond='At(store) & Sells(store, x)',\n",
-       "                                effect='Have(x)'),\n",
-       "                         Action('Go(x, y)',\n",
-       "                                precond='At(x)',\n",
-       "                                effect='At(y) & ~At(x)')])\n",
+       "    A simplified definition of the Sussman Anomaly problem.\n",
+       "\n",
+       "    Example:\n",
+       "    >>> from planning import *\n",
+       "    >>> sbw = simple_blocks_world()\n",
+       "    >>> sbw.goal_test()\n",
+       "    False\n",
+       "    >>> sbw.act(expr('ToTable(A, B)'))\n",
+       "    >>> sbw.act(expr('FromTable(B, A)'))\n",
+       "    >>> sbw.goal_test()\n",
+       "    False\n",
+       "    >>> sbw.act(expr('FromTable(C, B)'))\n",
+       "    >>> sbw.goal_test()\n",
+       "    True\n",
+       "    >>>\n",
+       "    """\n",
+       "\n",
+       "    return PlanningProblem(init='On(A, B) & Clear(A) & OnTable(B) & OnTable(C) & Clear(C)',\n",
+       "                goals='On(B, A) & On(C, B)',\n",
+       "                actions=[Action('ToTable(x, y)',\n",
+       "                                precond='On(x, y) & Clear(x)',\n",
+       "                                effect='~On(x, y) & Clear(y) & OnTable(x)'),\n",
+       "                         Action('FromTable(y, x)',\n",
+       "                                precond='OnTable(y) & Clear(y) & Clear(x)',\n",
+       "                                effect='~OnTable(y) & ~Clear(x) & On(y, x)')])\n",
        "
\n", "\n", "\n" @@ -1643,20 +1743,26 @@ } ], "source": [ - "psource(shopping_problem)" + "psource(simple_blocks_world)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "**At(x):** Indicates that we are currently at **'x'** where **'x'** can be Home, SM (supermarket) or HW (Hardware store).\n", + "**On(x, y):** The block **'x'** is on **'y'**. Both **'x'** and **'y'** have to be blocks.\n", "\n", - "**~At(x):** Indicates that we are currently _not_ at **'x'**.\n", + "**~On(x, y):** The block **'x'** is _not_ on **'y'**. Both **'x'** and **'y'** have to be blocks.\n", "\n", - "**Sells(s, x):** Indicates that item **'x'** can be bought from store **'s'**.\n", + "**OnTable(x):** The block **'x'** is on the table.\n", "\n", - "**Have(x):** Indicates that we possess the item **'x'**." + "**~OnTable(x):** The block **'x'** is _not_ on the table.\n", + "\n", + "**Clear(x):** To indicate that there is nothing on **'x'** and it is free to be moved around.\n", + "\n", + "**~Clear(x):** To indicate that there is something on **'x'** and it cannot be moved.\n", + "\n", + "Let's now define a `simple_blocks_world` prolem." ] }, { @@ -1667,14 +1773,14 @@ }, "outputs": [], "source": [ - "shoppingProblem = shopping_problem()" + "simpleBlocksWorld = simple_blocks_world()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Let's first check whether the goal state Have(Milk), Have(Banana), Have(Drill) is reached or not." + "Before taking any actions, we will see if `simple_bw` has reached its goal." ] }, { @@ -1683,34 +1789,33 @@ "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "False\n" - ] + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "print(shoppingProblem.goal_test())" + "simpleBlocksWorld.goal_test()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Let's look at the possible actions\n", + "As we can see, it hasn't completed the goal. \n", + "We now define a sequence of actions that can stack three blocks in the required order. \n", + "The actions are then carried out on the `simple_bw` PlanningProblem.\n", "\n", - "**Buy(x, store):** Buy an item **'x'** from a **'store'** given that the **'store'** sells **'x'**.\n", + "The actions available to us are the following: MoveToTable, Move\n", "\n", - "**Go(x, y):** Go to destination **'y'** starting from source **'x'**." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We now define a valid solution that will help us reach the goal.\n", - "The sequence of actions will then be carried out onto the `shoppingProblem` PDDL." + "**ToTable(x, y): ** Move box **'x'** stacked on **'y'** to the table, given that box **'y'** is clear.\n", + "\n", + "**FromTable(x, y): ** Move box **'x'** from wherever it is, to the top of **'y'**, given that both **'x'** and **'y'** are clear.\n" ] }, { @@ -1721,22 +1826,19 @@ }, "outputs": [], "source": [ - "solution = [expr('Go(Home, SM)'),\n", - " expr('Buy(Milk, SM)'),\n", - " expr('Buy(Banana, SM)'),\n", - " expr('Go(SM, HW)'),\n", - " expr('Buy(Drill, HW)')]\n", + "solution = [expr('ToTable(A, B)'),\n", + " expr('FromTable(B, A)'),\n", + " expr('FromTable(C, B)')]\n", "\n", "for action in solution:\n", - " shoppingProblem.act(action)" + " simpleBlocksWorld.act(action)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "We have taken the steps required to acquire all the stuff we need. \n", - "Let's see if we have reached our goal." + "As the `three_block_tower` has taken all the steps it needed in order to achieve the goal, we can now check if it has acheived its goal." ] }, { @@ -1745,40 +1847,38 @@ "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 33, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "True\n" + ] } ], "source": [ - "shoppingProblem.goal_test()" + "print(simpleBlocksWorld.goal_test())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "It has now successfully achieved the goal." + "It has now successfully achieved its goal i.e, to build a stack of three blocks in the specified order." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Have Cake and Eat Cake Too" + "## Shopping Problem" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "This problem requires us to reach the state of having a cake and having eaten a cake simlutaneously, given a single cake.\n", - "Let's first take a look at the definition of the `have_cake_and_eat_cake_too` problem in the module." + "This problem requires us to acquire a carton of milk, a banana and a drill.\n", + "Initially, we start from home and it is known to us that milk and bananas are available in the supermarket and the hardware store sells drills.\n", + "Let's take a look at the definition of the `shopping_problem` in the module." ] }, { @@ -1875,17 +1975,37 @@ "\n", "

\n", "\n", - "
def have_cake_and_eat_cake_too():\n",
-       "    """Cake problem"""\n",
+       "
def shopping_problem():\n",
+       "    """\n",
+       "    SHOPPING-PROBLEM\n",
        "\n",
-       "    return PDDL(init='Have(Cake)',\n",
-       "                goals='Have(Cake) & Eaten(Cake)',\n",
-       "                actions=[Action('Eat(Cake)',\n",
-       "                                precond='Have(Cake)',\n",
-       "                                effect='Eaten(Cake) & ~Have(Cake)'),\n",
-       "                         Action('Bake(Cake)',\n",
-       "                                precond='~Have(Cake)',\n",
-       "                                effect='Have(Cake)')])\n",
+       "    A problem of acquiring some items given their availability at certain stores.\n",
+       "\n",
+       "    Example:\n",
+       "    >>> from planning import *\n",
+       "    >>> sp = shopping_problem()\n",
+       "    >>> sp.goal_test()\n",
+       "    False\n",
+       "    >>> sp.act(expr('Go(Home, HW)'))\n",
+       "    >>> sp.act(expr('Buy(Drill, HW)'))\n",
+       "    >>> sp.act(expr('Go(HW, SM)'))\n",
+       "    >>> sp.act(expr('Buy(Banana, SM)'))\n",
+       "    >>> sp.goal_test()\n",
+       "    False\n",
+       "    >>> sp.act(expr('Buy(Milk, SM)'))\n",
+       "    >>> sp.goal_test()\n",
+       "    True\n",
+       "    >>>\n",
+       "    """\n",
+       "\n",
+       "    return PlanningProblem(init='At(Home) & Sells(SM, Milk) & Sells(SM, Banana) & Sells(HW, Drill)',\n",
+       "                goals='Have(Milk) & Have(Banana) & Have(Drill)', \n",
+       "                actions=[Action('Buy(x, store)',\n",
+       "                                precond='At(store) & Sells(store, x)',\n",
+       "                                effect='Have(x)'),\n",
+       "                         Action('Go(x, y)',\n",
+       "                                precond='At(x)',\n",
+       "                                effect='At(y) & ~At(x)')])\n",
        "
\n", "\n", "\n" @@ -1899,18 +2019,20 @@ } ], "source": [ - "psource(have_cake_and_eat_cake_too)" + "psource(shopping_problem)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Since this problem doesn't involve variables, states can be considered similar to symbols in propositional logic.\n", + "**At(x):** Indicates that we are currently at **'x'** where **'x'** can be Home, SM (supermarket) or HW (Hardware store).\n", "\n", - "**Have(Cake):** Declares that we have a **'Cake'**.\n", + "**~At(x):** Indicates that we are currently _not_ at **'x'**.\n", "\n", - "**~Have(Cake):** Declares that we _don't_ have a **'Cake'**." + "**Sells(s, x):** Indicates that item **'x'** can be bought from store **'s'**.\n", + "\n", + "**Have(x):** Indicates that we possess the item **'x'**." ] }, { @@ -1921,14 +2043,14 @@ }, "outputs": [], "source": [ - "cakeProblem = have_cake_and_eat_cake_too()" + "shoppingProblem = shopping_problem()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "First let us check whether the goal state 'Have(Cake)' and 'Eaten(Cake)' are reached or not." + "Let's first check whether the goal state Have(Milk), Have(Banana), Have(Drill) is reached or not." ] }, { @@ -1945,26 +2067,26 @@ } ], "source": [ - "print(cakeProblem.goal_test())" + "print(shoppingProblem.goal_test())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Let us look at the possible actions.\n", + "Let's look at the possible actions\n", "\n", - "**Bake(x):** To bake **' x '**.\n", + "**Buy(x, store):** Buy an item **'x'** from a **'store'** given that the **'store'** sells **'x'**.\n", "\n", - "**Eat(x):** To eat **' x '**." + "**Go(x, y):** Go to destination **'y'** starting from source **'x'**." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "We now define a valid solution that can help us reach the goal.\n", - "The sequence of actions will then be acted upon the `cakeProblem` PDDL." + "We now define a valid solution that will help us reach the goal.\n", + "The sequence of actions will then be carried out onto the `shoppingProblem` PlanningProblem." ] }, { @@ -1975,18 +2097,22 @@ }, "outputs": [], "source": [ - "solution = [expr(\"Eat(Cake)\"),\n", - " expr(\"Bake(Cake)\")]\n", + "solution = [expr('Go(Home, SM)'),\n", + " expr('Buy(Milk, SM)'),\n", + " expr('Buy(Banana, SM)'),\n", + " expr('Go(SM, HW)'),\n", + " expr('Buy(Drill, HW)')]\n", "\n", "for action in solution:\n", - " cakeProblem.act(action)" + " shoppingProblem.act(action)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Now we have made actions to bake the cake and eat the cake. Let us check if we have reached the goal." + "We have taken the steps required to acquire all the stuff we need. \n", + "Let's see if we have reached our goal." ] }, { @@ -1995,130 +2121,2779 @@ "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "True\n" - ] + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "print(cakeProblem.goal_test())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "It has now successfully achieved its goal i.e, to have and eat the cake." + "shoppingProblem.goal_test()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "One might wonder if the order of the actions matters for this problem.\n", - "Let's see for ourselves." - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "metadata": {}, - "outputs": [ - { - "ename": "Exception", - "evalue": "Action 'Bake(Cake)' pre-conditions not satisfied", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mException\u001b[0m Traceback (most recent call last)", - "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 6\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0maction\u001b[0m \u001b[1;32min\u001b[0m \u001b[0msolution\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 7\u001b[1;33m \u001b[0mcakeProblem\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mact\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0maction\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[1;32m~\\Documents\\Python\\Aima\\aima-python\\planning.py\u001b[0m in \u001b[0;36mact\u001b[1;34m(self, action)\u001b[0m\n\u001b[0;32m 44\u001b[0m \u001b[1;32mraise\u001b[0m \u001b[0mException\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"Action '{}' not found\"\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0maction_name\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 45\u001b[0m \u001b[1;32mif\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mlist_action\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcheck_precond\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0minit\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0margs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 46\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mException\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"Action '{}' pre-conditions not satisfied\"\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0maction\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 47\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0minit\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mlist_action\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0minit\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0margs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mclauses\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 48\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", - "\u001b[1;31mException\u001b[0m: Action 'Bake(Cake)' pre-conditions not satisfied" - ] - } - ], - "source": [ - "cakeProblem = have_cake_and_eat_cake_too()\n", - "\n", - "solution = [expr('Bake(Cake)'),\n", - " expr('Eat(Cake)')]\n", - "\n", - "for action in solution:\n", - " cakeProblem.act(action)" + "It has now successfully achieved the goal." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "It raises an exception.\n", - "Indeed, according to the problem, we cannot bake a cake if we already have one.\n", - "In planning terms, '~Have(Cake)' is a precondition to the action 'Bake(Cake)'.\n", - "Hence, this solution is invalid." + "## Socks and Shoes" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## SOLVING PLANNING PROBLEMS\n", - "----\n", - "### GRAPHPLAN\n", - "
\n", - "The GraphPlan algorithm is a popular method of solving classical planning problems.\n", - "Before we get into the details of the algorithm, let's look at a special data structure called **planning graph**, used to give better heuristic estimates and plays a key role in the GraphPlan algorithm." + "This is a simple problem of putting on a pair of socks and shoes.\n", + "The problem is defined in the module as given below." ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": 39, "metadata": {}, - "source": [ - "### Planning Graph\n", - "A planning graph is a directed graph organized into levels. \n", - "Each level contains information about the current state of the knowledge base and the possible state-action links to and from that level.\n", - "The first level contains the initial state with nodes representing each fluent that holds in that level.\n", - "This level has state-action links linking each state to valid actions in that state.\n", - "Each action is linked to all its preconditions and its effect states.\n", - "Based on these effects, the next level is constructed.\n", - "The next level contains similarly structured information about the next state.\n", - "In this way, the graph is expanded using state-action links till we reach a state where all the required goals hold true simultaneously.\n", - "We can say that we have reached our goal if none of the goal states in the current level are mutually exclusive.\n", - "This will be explained in detail later.\n", - "
\n", - "Planning graphs only work for propositional planning problems, hence we need to eliminate all variables by generating all possible substitutions.\n", - "
\n", - "For example, the planning graph of the `have_cake_and_eat_cake_too` problem might look like this\n", - "![title](images/cake_graph.jpg)\n", - "
\n", + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + " Codestin Search App\n", + " \n", + " \n", + "\n", + "\n", + "

\n", + "\n", + "
def socks_and_shoes():\n",
+       "    """\n",
+       "    SOCKS-AND-SHOES-PROBLEM\n",
+       "\n",
+       "    A task of wearing socks and shoes on both feet\n",
+       "\n",
+       "    Example:\n",
+       "    >>> from planning import *\n",
+       "    >>> ss = socks_and_shoes()\n",
+       "    >>> ss.goal_test()\n",
+       "    False\n",
+       "    >>> ss.act(expr('RightSock'))\n",
+       "    >>> ss.act(expr('RightShoe'))\n",
+       "    >>> ss.act(expr('LeftSock'))\n",
+       "    >>> ss.goal_test()\n",
+       "    False\n",
+       "    >>> ss.act(expr('LeftShoe'))\n",
+       "    >>> ss.goal_test()\n",
+       "    True\n",
+       "    >>>\n",
+       "    """\n",
+       "\n",
+       "    return PlanningProblem(init='',\n",
+       "                goals='RightShoeOn & LeftShoeOn',\n",
+       "                actions=[Action('RightShoe',\n",
+       "                                precond='RightSockOn',\n",
+       "                                effect='RightShoeOn'),\n",
+       "                        Action('RightSock',\n",
+       "                                precond='',\n",
+       "                                effect='RightSockOn'),\n",
+       "                        Action('LeftShoe',\n",
+       "                                precond='LeftSockOn',\n",
+       "                                effect='LeftShoeOn'),\n",
+       "                        Action('LeftSock',\n",
+       "                                precond='',\n",
+       "                                effect='LeftSockOn')])\n",
+       "
\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "psource(socks_and_shoes)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**LeftSockOn:** Indicates that we have already put on the left sock.\n", + "\n", + "**RightSockOn:** Indicates that we have already put on the right sock.\n", + "\n", + "**LeftShoeOn:** Indicates that we have already put on the left shoe.\n", + "\n", + "**RightShoeOn:** Indicates that we have already put on the right shoe.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "socksShoes = socks_and_shoes()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's first check whether the goal state is reached or not." + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "socksShoes.goal_test()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As the goal state isn't reached, we will define a sequence of actions that might help us achieve the goal.\n", + "These actions will then be acted upon the `socksShoes` PlanningProblem to check if the goal state is reached." + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "solution = [expr('RightSock'),\n", + " expr('RightShoe'),\n", + " expr('LeftSock'),\n", + " expr('LeftShoe')]" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "for action in solution:\n", + " socksShoes.act(action)\n", + " \n", + "socksShoes.goal_test()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We have reached our goal." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Cake Problem" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This problem requires us to reach the state of having a cake and having eaten a cake simlutaneously, given a single cake.\n", + "Let's first take a look at the definition of the `have_cake_and_eat_cake_too` problem in the module." + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + " Codestin Search App\n", + " \n", + " \n", + "\n", + "\n", + "

\n", + "\n", + "
def have_cake_and_eat_cake_too():\n",
+       "    """\n",
+       "    [Figure 10.7] CAKE-PROBLEM\n",
+       "\n",
+       "    A problem where we begin with a cake and want to \n",
+       "    reach the state of having a cake and having eaten a cake.\n",
+       "    The possible actions include baking a cake and eating a cake.\n",
+       "\n",
+       "    Example:\n",
+       "    >>> from planning import *\n",
+       "    >>> cp = have_cake_and_eat_cake_too()\n",
+       "    >>> cp.goal_test()\n",
+       "    False\n",
+       "    >>> cp.act(expr('Eat(Cake)'))\n",
+       "    >>> cp.goal_test()\n",
+       "    False\n",
+       "    >>> cp.act(expr('Bake(Cake)'))\n",
+       "    >>> cp.goal_test()\n",
+       "    True\n",
+       "    >>>\n",
+       "    """\n",
+       "\n",
+       "    return PlanningProblem(init='Have(Cake)',\n",
+       "                goals='Have(Cake) & Eaten(Cake)',\n",
+       "                actions=[Action('Eat(Cake)',\n",
+       "                                precond='Have(Cake)',\n",
+       "                                effect='Eaten(Cake) & ~Have(Cake)'),\n",
+       "                         Action('Bake(Cake)',\n",
+       "                                precond='~Have(Cake)',\n",
+       "                                effect='Have(Cake)')])\n",
+       "
\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "psource(have_cake_and_eat_cake_too)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Since this problem doesn't involve variables, states can be considered similar to symbols in propositional logic.\n", + "\n", + "**Have(Cake):** Declares that we have a **'Cake'**.\n", + "\n", + "**~Have(Cake):** Declares that we _don't_ have a **'Cake'**." + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "cakeProblem = have_cake_and_eat_cake_too()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First let us check whether the goal state 'Have(Cake)' and 'Eaten(Cake)' are reached or not." + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "False\n" + ] + } + ], + "source": [ + "print(cakeProblem.goal_test())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let us look at the possible actions.\n", + "\n", + "**Bake(x):** To bake **' x '**.\n", + "\n", + "**Eat(x):** To eat **' x '**." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We now define a valid solution that can help us reach the goal.\n", + "The sequence of actions will then be acted upon the `cakeProblem` PlanningProblem." + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "solution = [expr(\"Eat(Cake)\"),\n", + " expr(\"Bake(Cake)\")]\n", + "\n", + "for action in solution:\n", + " cakeProblem.act(action)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we have made actions to bake the cake and eat the cake. Let us check if we have reached the goal." + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "True\n" + ] + } + ], + "source": [ + "print(cakeProblem.goal_test())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "It has now successfully achieved its goal i.e, to have and eat the cake." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "One might wonder if the order of the actions matters for this problem.\n", + "Let's see for ourselves." + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [ + { + "ename": "Exception", + "evalue": "Action 'Bake(Cake)' pre-conditions not satisfied", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mException\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 6\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0maction\u001b[0m \u001b[1;32min\u001b[0m \u001b[0msolution\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 7\u001b[1;33m \u001b[0mcakeProblem\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mact\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0maction\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[1;32m~\\Documents\\Python\\Data Science\\Machine Learning\\Aima\\planning.py\u001b[0m in \u001b[0;36mact\u001b[1;34m(self, action)\u001b[0m\n\u001b[0;32m 58\u001b[0m \u001b[1;32mraise\u001b[0m \u001b[0mException\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"Action '{}' not found\"\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0maction_name\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 59\u001b[0m \u001b[1;32mif\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mlist_action\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcheck_precond\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0minit\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0margs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 60\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mException\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"Action '{}' pre-conditions not satisfied\"\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0maction\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 61\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0minit\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mlist_action\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0minit\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0margs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mclauses\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 62\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;31mException\u001b[0m: Action 'Bake(Cake)' pre-conditions not satisfied" + ] + } + ], + "source": [ + "cakeProblem = have_cake_and_eat_cake_too()\n", + "\n", + "solution = [expr('Bake(Cake)'),\n", + " expr('Eat(Cake)')]\n", + "\n", + "for action in solution:\n", + " cakeProblem.act(action)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "It raises an exception.\n", + "Indeed, according to the problem, we cannot bake a cake if we already have one.\n", + "In planning terms, '~Have(Cake)' is a precondition to the action 'Bake(Cake)'.\n", + "Hence, this solution is invalid." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## SOLVING PLANNING PROBLEMS\n", + "----\n", + "### GRAPHPLAN\n", + "
\n", + "The GraphPlan algorithm is a popular method of solving classical planning problems.\n", + "Before we get into the details of the algorithm, let's look at a special data structure called **planning graph**, used to give better heuristic estimates and plays a key role in the GraphPlan algorithm." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Planning Graph\n", + "A planning graph is a directed graph organized into levels. \n", + "Each level contains information about the current state of the knowledge base and the possible state-action links to and from that level.\n", + "The first level contains the initial state with nodes representing each fluent that holds in that level.\n", + "This level has state-action links linking each state to valid actions in that state.\n", + "Each action is linked to all its preconditions and its effect states.\n", + "Based on these effects, the next level is constructed.\n", + "The next level contains similarly structured information about the next state.\n", + "In this way, the graph is expanded using state-action links till we reach a state where all the required goals hold true simultaneously.\n", + "We can say that we have reached our goal if none of the goal states in the current level are mutually exclusive.\n", + "This will be explained in detail later.\n", + "
\n", + "Planning graphs only work for propositional planning problems, hence we need to eliminate all variables by generating all possible substitutions.\n", + "
\n", + "For example, the planning graph of the `have_cake_and_eat_cake_too` problem might look like this\n", + "![title](images/cake_graph.jpg)\n", + "
\n", "The black lines indicate links between states and actions.\n", "
\n", - "In every planning problem, we are allowed to carry out the `no-op` action, ie, we can choose no action for a particular state.\n", - "These are called 'Persistence' actions and are represented in the graph by the small square boxes.\n", - "In technical terms, a persistence action has effects same as its preconditions.\n", - "This enables us to carry a state to the next level.\n", + "In every planning problem, we are allowed to carry out the `no-op` action, ie, we can choose no action for a particular state.\n", + "These are called 'Persistence' actions and are represented in the graph by the small square boxes.\n", + "In technical terms, a persistence action has effects same as its preconditions.\n", + "This enables us to carry a state to the next level.\n", + "
\n", + "
\n", + "The gray lines indicate mutual exclusivity.\n", + "This means that the actions connected bya gray line cannot be taken together.\n", + "Mutual exclusivity (mutex) occurs in the following cases:\n", + "1. **Inconsistent effects**: One action negates the effect of the other. For example, _Eat(Cake)_ and the persistence of _Have(Cake)_ have inconsistent effects because they disagree on the effect _Have(Cake)_\n", + "2. **Interference**: One of the effects of an action is the negation of a precondition of the other. For example, _Eat(Cake)_ interferes with the persistence of _Have(Cake)_ by negating its precondition.\n", + "3. **Competing needs**: One of the preconditions of one action is mutually exclusive with a precondition of the other. For example, _Bake(Cake)_ and _Eat(Cake)_ are mutex because they compete on the value of the _Have(Cake)_ precondition." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In the module, planning graphs have been implemented using two classes, `Level` which stores data for a particular level and `Graph` which connects multiple levels together.\n", + "Let's look at the `Level` class." + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + " Codestin Search App\n", + " \n", + " \n", + "\n", + "\n", + "

\n", + "\n", + "
class Level:\n",
+       "    """\n",
+       "    Contains the state of the planning problem\n",
+       "    and exhaustive list of actions which use the\n",
+       "    states as pre-condition.\n",
+       "    """\n",
+       "\n",
+       "    def __init__(self, kb):\n",
+       "        """Initializes variables to hold state and action details of a level"""\n",
+       "\n",
+       "        self.kb = kb\n",
+       "        # current state\n",
+       "        self.current_state = kb.clauses\n",
+       "        # current action to state link\n",
+       "        self.current_action_links = {}\n",
+       "        # current state to action link\n",
+       "        self.current_state_links = {}\n",
+       "        # current action to next state link\n",
+       "        self.next_action_links = {}\n",
+       "        # next state to current action link\n",
+       "        self.next_state_links = {}\n",
+       "        # mutually exclusive actions\n",
+       "        self.mutex = []\n",
+       "\n",
+       "    def __call__(self, actions, objects):\n",
+       "        self.build(actions, objects)\n",
+       "        self.find_mutex()\n",
+       "\n",
+       "    def separate(self, e):\n",
+       "        """Separates an iterable of elements into positive and negative parts"""\n",
+       "\n",
+       "        positive = []\n",
+       "        negative = []\n",
+       "        for clause in e:\n",
+       "            if clause.op[:3] == 'Not':\n",
+       "                negative.append(clause)\n",
+       "            else:\n",
+       "                positive.append(clause)\n",
+       "        return positive, negative\n",
+       "\n",
+       "    def find_mutex(self):\n",
+       "        """Finds mutually exclusive actions"""\n",
+       "\n",
+       "        # Inconsistent effects\n",
+       "        pos_nsl, neg_nsl = self.separate(self.next_state_links)\n",
+       "\n",
+       "        for negeff in neg_nsl:\n",
+       "            new_negeff = Expr(negeff.op[3:], *negeff.args)\n",
+       "            for poseff in pos_nsl:\n",
+       "                if new_negeff == poseff:\n",
+       "                    for a in self.next_state_links[poseff]:\n",
+       "                        for b in self.next_state_links[negeff]:\n",
+       "                            if {a, b} not in self.mutex:\n",
+       "                                self.mutex.append({a, b})\n",
+       "\n",
+       "        # Interference will be calculated with the last step\n",
+       "        pos_csl, neg_csl = self.separate(self.current_state_links)\n",
+       "\n",
+       "        # Competing needs\n",
+       "        for posprecond in pos_csl:\n",
+       "            for negprecond in neg_csl:\n",
+       "                new_negprecond = Expr(negprecond.op[3:], *negprecond.args)\n",
+       "                if new_negprecond == posprecond:\n",
+       "                    for a in self.current_state_links[posprecond]:\n",
+       "                        for b in self.current_state_links[negprecond]:\n",
+       "                            if {a, b} not in self.mutex:\n",
+       "                                self.mutex.append({a, b})\n",
+       "\n",
+       "        # Inconsistent support\n",
+       "        state_mutex = []\n",
+       "        for pair in self.mutex:\n",
+       "            next_state_0 = self.next_action_links[list(pair)[0]]\n",
+       "            if len(pair) == 2:\n",
+       "                next_state_1 = self.next_action_links[list(pair)[1]]\n",
+       "            else:\n",
+       "                next_state_1 = self.next_action_links[list(pair)[0]]\n",
+       "            if (len(next_state_0) == 1) and (len(next_state_1) == 1):\n",
+       "                state_mutex.append({next_state_0[0], next_state_1[0]})\n",
+       "        \n",
+       "        self.mutex = self.mutex + state_mutex\n",
+       "\n",
+       "    def build(self, actions, objects):\n",
+       "        """Populates the lists and dictionaries containing the state action dependencies"""\n",
+       "\n",
+       "        for clause in self.current_state:\n",
+       "            p_expr = Expr('P' + clause.op, *clause.args)\n",
+       "            self.current_action_links[p_expr] = [clause]\n",
+       "            self.next_action_links[p_expr] = [clause]\n",
+       "            self.current_state_links[clause] = [p_expr]\n",
+       "            self.next_state_links[clause] = [p_expr]\n",
+       "\n",
+       "        for a in actions:\n",
+       "            num_args = len(a.args)\n",
+       "            possible_args = tuple(itertools.permutations(objects, num_args))\n",
+       "\n",
+       "            for arg in possible_args:\n",
+       "                if a.check_precond(self.kb, arg):\n",
+       "                    for num, symbol in enumerate(a.args):\n",
+       "                        if not symbol.op.islower():\n",
+       "                            arg = list(arg)\n",
+       "                            arg[num] = symbol\n",
+       "                            arg = tuple(arg)\n",
+       "\n",
+       "                    new_action = a.substitute(Expr(a.name, *a.args), arg)\n",
+       "                    self.current_action_links[new_action] = []\n",
+       "\n",
+       "                    for clause in a.precond:\n",
+       "                        new_clause = a.substitute(clause, arg)\n",
+       "                        self.current_action_links[new_action].append(new_clause)\n",
+       "                        if new_clause in self.current_state_links:\n",
+       "                            self.current_state_links[new_clause].append(new_action)\n",
+       "                        else:\n",
+       "                            self.current_state_links[new_clause] = [new_action]\n",
+       "                   \n",
+       "                    self.next_action_links[new_action] = []\n",
+       "                    for clause in a.effect:\n",
+       "                        new_clause = a.substitute(clause, arg)\n",
+       "\n",
+       "                        self.next_action_links[new_action].append(new_clause)\n",
+       "                        if new_clause in self.next_state_links:\n",
+       "                            self.next_state_links[new_clause].append(new_action)\n",
+       "                        else:\n",
+       "                            self.next_state_links[new_clause] = [new_action]\n",
+       "\n",
+       "    def perform_actions(self):\n",
+       "        """Performs the necessary actions and returns a new Level"""\n",
+       "\n",
+       "        new_kb = FolKB(list(set(self.next_state_links.keys())))\n",
+       "        return Level(new_kb)\n",
+       "
\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "psource(Level)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Each level stores the following data\n", + "1. The current state of the level in `current_state`\n", + "2. Links from an action to its preconditions in `current_action_links`\n", + "3. Links from a state to the possible actions in that state in `current_state_links`\n", + "4. Links from each action to its effects in `next_action_links`\n", + "5. Links from each possible next state from each action in `next_state_links`. This stores the same information as the `current_action_links` of the next level.\n", + "6. Mutex links in `mutex`.\n", + "
\n", + "
\n", + "The `find_mutex` method finds the mutex links according to the points given above.\n", + "
\n", + "The `build` method populates the data structures storing the state and action information.\n", + "Persistence actions for each clause in the current state are also defined here. \n", + "The newly created persistence action has the same name as its state, prefixed with a 'P'." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's now look at the `Graph` class." + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + " Codestin Search App\n", + " \n", + " \n", + "\n", + "\n", + "

\n", + "\n", + "
class Graph:\n",
+       "    """\n",
+       "    Contains levels of state and actions\n",
+       "    Used in graph planning algorithm to extract a solution\n",
+       "    """\n",
+       "\n",
+       "    def __init__(self, pddl):\n",
+       "        self.pddl = pddl\n",
+       "        self.kb = FolKB(pddl.init)\n",
+       "        self.levels = [Level(self.kb)]\n",
+       "        self.objects = set(arg for clause in self.kb.clauses for arg in clause.args)\n",
+       "\n",
+       "    def __call__(self):\n",
+       "        self.expand_graph()\n",
+       "\n",
+       "    def expand_graph(self):\n",
+       "        """Expands the graph by a level"""\n",
+       "\n",
+       "        last_level = self.levels[-1]\n",
+       "        last_level(self.pddl.actions, self.objects)\n",
+       "        self.levels.append(last_level.perform_actions())\n",
+       "\n",
+       "    def non_mutex_goals(self, goals, index):\n",
+       "        """Checks whether the goals are mutually exclusive"""\n",
+       "\n",
+       "        goal_perm = itertools.combinations(goals, 2)\n",
+       "        for g in goal_perm:\n",
+       "            if set(g) in self.levels[index].mutex:\n",
+       "                return False\n",
+       "        return True\n",
+       "
\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "psource(Graph)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The class stores a problem definition in `pddl`, \n", + "a knowledge base in `kb`, \n", + "a list of `Level` objects in `levels` and \n", + "all the possible arguments found in the initial state of the problem in `objects`.\n", + "
\n", + "The `expand_graph` method generates a new level of the graph.\n", + "This method is invoked when the goal conditions haven't been met in the current level or the actions that lead to it are mutually exclusive.\n", + "The `non_mutex_goals` method checks whether the goals in the current state are mutually exclusive.\n", + "
\n", + "
\n", + "Using these two classes, we can define a planning graph which can either be used to provide reliable heuristics for planning problems or used in the `GraphPlan` algorithm.\n", + "
\n", + "Let's have a look at the `GraphPlan` class." + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + " Codestin Search App\n", + " \n", + " \n", + "\n", + "\n", + "

\n", + "\n", + "
class GraphPlan:\n",
+       "    """\n",
+       "    Class for formulation GraphPlan algorithm\n",
+       "    Constructs a graph of state and action space\n",
+       "    Returns solution for the planning problem\n",
+       "    """\n",
+       "\n",
+       "    def __init__(self, pddl):\n",
+       "        self.graph = Graph(pddl)\n",
+       "        self.nogoods = []\n",
+       "        self.solution = []\n",
+       "\n",
+       "    def check_leveloff(self):\n",
+       "        """Checks if the graph has levelled off"""\n",
+       "\n",
+       "        check = (set(self.graph.levels[-1].current_state) == set(self.graph.levels[-2].current_state))\n",
+       "\n",
+       "        if check:\n",
+       "            return True\n",
+       "\n",
+       "    def extract_solution(self, goals, index):\n",
+       "        """Extracts the solution"""\n",
+       "\n",
+       "        level = self.graph.levels[index]    \n",
+       "        if not self.graph.non_mutex_goals(goals, index):\n",
+       "            self.nogoods.append((level, goals))\n",
+       "            return\n",
+       "\n",
+       "        level = self.graph.levels[index - 1]    \n",
+       "\n",
+       "        # Create all combinations of actions that satisfy the goal    \n",
+       "        actions = []\n",
+       "        for goal in goals:\n",
+       "            actions.append(level.next_state_links[goal])    \n",
+       "\n",
+       "        all_actions = list(itertools.product(*actions))    \n",
+       "\n",
+       "        # Filter out non-mutex actions\n",
+       "        non_mutex_actions = []    \n",
+       "        for action_tuple in all_actions:\n",
+       "            action_pairs = itertools.combinations(list(set(action_tuple)), 2)        \n",
+       "            non_mutex_actions.append(list(set(action_tuple)))        \n",
+       "            for pair in action_pairs:            \n",
+       "                if set(pair) in level.mutex:\n",
+       "                    non_mutex_actions.pop(-1)\n",
+       "                    break\n",
+       "    \n",
+       "\n",
+       "        # Recursion\n",
+       "        for action_list in non_mutex_actions:        \n",
+       "            if [action_list, index] not in self.solution:\n",
+       "                self.solution.append([action_list, index])\n",
+       "\n",
+       "                new_goals = []\n",
+       "                for act in set(action_list):                \n",
+       "                    if act in level.current_action_links:\n",
+       "                        new_goals = new_goals + level.current_action_links[act]\n",
+       "\n",
+       "                if abs(index) + 1 == len(self.graph.levels):\n",
+       "                    return\n",
+       "                elif (level, new_goals) in self.nogoods:\n",
+       "                    return\n",
+       "                else:\n",
+       "                    self.extract_solution(new_goals, index - 1)\n",
+       "\n",
+       "        # Level-Order multiple solutions\n",
+       "        solution = []\n",
+       "        for item in self.solution:\n",
+       "            if item[1] == -1:\n",
+       "                solution.append([])\n",
+       "                solution[-1].append(item[0])\n",
+       "            else:\n",
+       "                solution[-1].append(item[0])\n",
+       "\n",
+       "        for num, item in enumerate(solution):\n",
+       "            item.reverse()\n",
+       "            solution[num] = item\n",
+       "\n",
+       "        return solution\n",
+       "\n",
+       "    def goal_test(self, kb):\n",
+       "        return all(kb.ask(q) is not False for q in self.graph.pddl.goals)\n",
+       "\n",
+       "    def execute(self):\n",
+       "        """Executes the GraphPlan algorithm for the given problem"""\n",
+       "\n",
+       "        while True:\n",
+       "            self.graph.expand_graph()\n",
+       "            if (self.goal_test(self.graph.levels[-1].kb) and self.graph.non_mutex_goals(self.graph.pddl.goals, -1)):\n",
+       "                solution = self.extract_solution(self.graph.pddl.goals, -1)\n",
+       "                if solution:\n",
+       "                    return solution\n",
+       "            \n",
+       "            if len(self.graph.levels) >= 2 and self.check_leveloff():\n",
+       "                return None\n",
+       "
\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "psource(GraphPlan)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Given a planning problem defined as a PlanningProblem, `GraphPlan` creates a planning graph stored in `graph` and expands it till it reaches a state where all its required goals are present simultaneously without mutual exclusivity.\n", + "
\n", + "Once a goal is found, `extract_solution` is called.\n", + "This method recursively finds the path to a solution given a planning graph.\n", + "In the case where `extract_solution` fails to find a solution for a set of goals as a given level, we record the `(level, goals)` pair as a **no-good**.\n", + "Whenever `extract_solution` is called again with the same level and goals, we can find the recorded no-good and immediately return failure rather than searching again. \n", + "No-goods are also used in the termination test.\n", + "
\n", + "The `check_leveloff` method checks if the planning graph for the problem has **levelled-off**, ie, it has the same states, actions and mutex pairs as the previous level.\n", + "If the graph has already levelled off and we haven't found a solution, there is no point expanding the graph, as it won't lead to anything new.\n", + "In such a case, we can declare that the planning problem is unsolvable with the given constraints.\n", + "
\n", + "
\n", + "To summarize, the `GraphPlan` algorithm calls `expand_graph` and tests whether it has reached the goal and if the goals are non-mutex.\n", + "
\n", + "If so, `extract_solution` is invoked which recursively reconstructs the solution from the planning graph.\n", + "
\n", + "If not, then we check if our graph has levelled off and continue if it hasn't." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's solve a few planning problems that we had defined earlier." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Air cargo problem\n", + "In accordance with the summary above, we have defined a helper function to carry out `GraphPlan` on the `air_cargo` problem.\n", + "The function is pretty straightforward.\n", + "Let's have a look." + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + " Codestin Search App\n", + " \n", + " \n", + "\n", + "\n", + "

\n", + "\n", + "
def air_cargo_graphplan():\n",
+       "    """Solves the air cargo problem using GraphPlan"""\n",
+       "    return GraphPlan(air_cargo()).execute()\n",
+       "
\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "psource(air_cargo_graphplan)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's instantiate the problem and find a solution using this helper function." + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[[[Load(C2, P2, JFK),\n", + " Fly(P2, JFK, SFO),\n", + " Load(C1, P1, SFO),\n", + " Fly(P1, SFO, JFK),\n", + " PCargo(C1),\n", + " PAirport(JFK),\n", + " PPlane(P2),\n", + " PAirport(SFO),\n", + " PPlane(P1),\n", + " PCargo(C2)],\n", + " [Unload(C2, P2, SFO), Unload(C1, P1, JFK)]]]" + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "airCargoG = air_cargo_graphplan()\n", + "airCargoG" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Each element in the solution is a valid action.\n", + "The solution is separated into lists for each level.\n", + "The actions prefixed with a 'P' are persistence actions and can be ignored.\n", + "They simply carry certain states forward.\n", + "We have another helper function `linearize` that presents the solution in a more readable format, much like a total-order planner, but it is _not_ a total-order planner." + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Load(C2, P2, JFK),\n", + " Fly(P2, JFK, SFO),\n", + " Load(C1, P1, SFO),\n", + " Fly(P1, SFO, JFK),\n", + " Unload(C2, P2, SFO),\n", + " Unload(C1, P1, JFK)]" + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "linearize(airCargoG)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Indeed, this is a correct solution.\n", + "
\n", + "There are similar helper functions for some other planning problems.\n", + "
\n", + "Lets' try solving the spare tire problem." + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Remove(Flat, Axle), Remove(Spare, Trunk), PutOn(Spare, Axle)]" + ] + }, + "execution_count": 56, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "spareTireG = spare_tire_graphplan()\n", + "linearize(spareTireG)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Solution for the cake problem" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Eat(Cake), Bake(Cake)]" + ] + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cakeProblemG = have_cake_and_eat_cake_too_graphplan()\n", + "linearize(cakeProblemG)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Solution for the Sussman's Anomaly configuration of three blocks." + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[MoveToTable(C, A), Move(B, Table, C), Move(A, Table, B)]" + ] + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sussmanAnomalyG = three_block_tower_graphplan()\n", + "linearize(sussmanAnomalyG)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Solution of the socks and shoes problem" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[LeftSock, RightSock, LeftShoe, RightShoe]" + ] + }, + "execution_count": 59, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "socksShoesG = socks_and_shoes_graphplan()\n", + "linearize(socksShoesG)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### TOTAL ORDER PLANNER\n", + "\n", + "In mathematical terminology, **total order**, **linear order** or **simple order** refers to a set *X* which is said to be totally ordered under ≤ if the following statements hold for all *a*, *b* and *c* in *X*:\n", + "
\n", + "If *a* ≤ *b* and *b* ≤ *a*, then *a* = *b* (antisymmetry).\n", + "
\n", + "If *a* ≤ *b* and *b* ≤ *c*, then *a* ≤ *c* (transitivity).\n", + "
\n", + "*a* ≤ *b* or *b* ≤ *a* (connex relation).\n", + "\n", + "
\n", + "In simpler terms, a total order plan is a linear ordering of actions to be taken to reach the goal state.\n", + "There may be several different total-order plans for a particular goal depending on the problem.\n", + "
\n", + "
\n", + "In the module, the `Linearize` class solves problems using this paradigm.\n", + "At its core, the `Linearize` uses a solved planning graph from `GraphPlan` and finds a valid total-order solution for it.\n", + "Let's have a look at the class." + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + " Codestin Search App\n", + " \n", + " \n", + "\n", + "\n", + "

\n", + "\n", + "
class Linearize:\n",
+       "\n",
+       "    def __init__(self, pddl):\n",
+       "        self.pddl = pddl\n",
+       "\n",
+       "    def filter(self, solution):\n",
+       "        """Filter out persistence actions from a solution"""\n",
+       "\n",
+       "        new_solution = []\n",
+       "        for section in solution[0]:\n",
+       "            new_section = []\n",
+       "            for operation in section:\n",
+       "                if not (operation.op[0] == 'P' and operation.op[1].isupper()):\n",
+       "                    new_section.append(operation)\n",
+       "            new_solution.append(new_section)\n",
+       "        return new_solution\n",
+       "\n",
+       "    def orderlevel(self, level, pddl):\n",
+       "        """Return valid linear order of actions for a given level"""\n",
+       "\n",
+       "        for permutation in itertools.permutations(level):\n",
+       "            temp = copy.deepcopy(pddl)\n",
+       "            count = 0\n",
+       "            for action in permutation:\n",
+       "                try:\n",
+       "                    temp.act(action)\n",
+       "                    count += 1\n",
+       "                except:\n",
+       "                    count = 0\n",
+       "                    temp = copy.deepcopy(pddl)\n",
+       "                    break\n",
+       "            if count == len(permutation):\n",
+       "                return list(permutation), temp\n",
+       "        return None\n",
+       "\n",
+       "    def execute(self):\n",
+       "        """Finds total-order solution for a planning graph"""\n",
+       "\n",
+       "        graphplan_solution = GraphPlan(self.pddl).execute()\n",
+       "        filtered_solution = self.filter(graphplan_solution)\n",
+       "        ordered_solution = []\n",
+       "        pddl = self.pddl\n",
+       "        for level in filtered_solution:\n",
+       "            level_solution, pddl = self.orderlevel(level, pddl)\n",
+       "            for element in level_solution:\n",
+       "                ordered_solution.append(element)\n",
+       "\n",
+       "        return ordered_solution\n",
+       "
\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "psource(Linearize)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `filter` method removes the persistence actions (if any) from the planning graph representation.\n", + "
\n", + "The `orderlevel` method finds a valid total-ordering of a specified level of the planning-graph, given the state of the graph after the previous level.\n", + "
\n", + "The `execute` method sequentially calls `orderlevel` for all the levels in the planning-graph and returns the final total-order solution.\n", + "
\n", + "
\n", + "Let's look at some examples." + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Load(C2, P2, JFK),\n", + " Fly(P2, JFK, SFO),\n", + " Load(C1, P1, SFO),\n", + " Fly(P1, SFO, JFK),\n", + " Unload(C2, P2, SFO),\n", + " Unload(C1, P1, JFK)]" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# total-order solution for air_cargo problem\n", + "Linearize(air_cargo()).execute()" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Remove(Flat, Axle), Remove(Spare, Trunk), PutOn(Spare, Axle)]" + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# total-order solution for spare_tire problem\n", + "Linearize(spare_tire()).execute()" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[MoveToTable(C, A), Move(B, Table, C), Move(A, Table, B)]" + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# total-order solution for three_block_tower problem\n", + "Linearize(three_block_tower()).execute()" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[ToTable(A, B), FromTable(B, A), FromTable(C, B)]" + ] + }, + "execution_count": 64, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# total-order solution for simple_blocks_world problem\n", + "Linearize(simple_blocks_world()).execute()" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[LeftSock, RightSock, LeftShoe, RightShoe]" + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# total-order solution for socks_and_shoes problem\n", + "Linearize(socks_and_shoes()).execute()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### PARTIAL ORDER PLANNER\n", + "A partial-order planning algorithm is significantly different from a total-order planner.\n", + "The way a partial-order plan works enables it to take advantage of _problem decomposition_ and work on each subproblem separately.\n", + "It works on several subgoals independently, solves them with several subplans, and then combines the plan.\n", + "
\n", + "A partial-order planner also follows the **least commitment** strategy, where it delays making choices for as long as possible.\n", + "Variables are not bound unless it is absolutely necessary and new actions are chosen only if the existing actions cannot fulfil the required precondition.\n", + "
\n", + "Any planning algorithm that can place two actions into a plan without specifying which comes first is called a **partial-order planner**.\n", + "A partial-order planner searches through the space of plans rather than the space of states, which makes it perform better for certain problems.\n", + "
\n", + "
\n", + "Let's have a look at the `PartialOrderPlanner` class." + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + " Codestin Search App\n", + " \n", + " \n", + "\n", + "\n", + "

\n", + "\n", + "
class PartialOrderPlanner:\n",
+       "\n",
+       "    def __init__(self, pddl):\n",
+       "        self.pddl = pddl\n",
+       "        self.initialize()\n",
+       "\n",
+       "    def initialize(self):\n",
+       "        """Initialize all variables"""\n",
+       "        self.causal_links = []\n",
+       "        self.start = Action('Start', [], self.pddl.init)\n",
+       "        self.finish = Action('Finish', self.pddl.goals, [])\n",
+       "        self.actions = set()\n",
+       "        self.actions.add(self.start)\n",
+       "        self.actions.add(self.finish)\n",
+       "        self.constraints = set()\n",
+       "        self.constraints.add((self.start, self.finish))\n",
+       "        self.agenda = set()\n",
+       "        for precond in self.finish.precond:\n",
+       "            self.agenda.add((precond, self.finish))\n",
+       "        self.expanded_actions = self.expand_actions()\n",
+       "\n",
+       "    def expand_actions(self, name=None):\n",
+       "        """Generate all possible actions with variable bindings for precondition selection heuristic"""\n",
+       "\n",
+       "        objects = set(arg for clause in self.pddl.init for arg in clause.args)\n",
+       "        expansions = []\n",
+       "        action_list = []\n",
+       "        if name is not None:\n",
+       "            for action in self.pddl.actions:\n",
+       "                if str(action.name) == name:\n",
+       "                    action_list.append(action)\n",
+       "        else:\n",
+       "            action_list = self.pddl.actions\n",
+       "\n",
+       "        for action in action_list:\n",
+       "            for permutation in itertools.permutations(objects, len(action.args)):\n",
+       "                bindings = unify(Expr(action.name, *action.args), Expr(action.name, *permutation))\n",
+       "                if bindings is not None:\n",
+       "                    new_args = []\n",
+       "                    for arg in action.args:\n",
+       "                        if arg in bindings:\n",
+       "                            new_args.append(bindings[arg])\n",
+       "                        else:\n",
+       "                            new_args.append(arg)\n",
+       "                    new_expr = Expr(str(action.name), *new_args)\n",
+       "                    new_preconds = []\n",
+       "                    for precond in action.precond:\n",
+       "                        new_precond_args = []\n",
+       "                        for arg in precond.args:\n",
+       "                            if arg in bindings:\n",
+       "                                new_precond_args.append(bindings[arg])\n",
+       "                            else:\n",
+       "                                new_precond_args.append(arg)\n",
+       "                        new_precond = Expr(str(precond.op), *new_precond_args)\n",
+       "                        new_preconds.append(new_precond)\n",
+       "                    new_effects = []\n",
+       "                    for effect in action.effect:\n",
+       "                        new_effect_args = []\n",
+       "                        for arg in effect.args:\n",
+       "                            if arg in bindings:\n",
+       "                                new_effect_args.append(bindings[arg])\n",
+       "                            else:\n",
+       "                                new_effect_args.append(arg)\n",
+       "                        new_effect = Expr(str(effect.op), *new_effect_args)\n",
+       "                        new_effects.append(new_effect)\n",
+       "                    expansions.append(Action(new_expr, new_preconds, new_effects))\n",
+       "\n",
+       "        return expansions\n",
+       "\n",
+       "    def find_open_precondition(self):\n",
+       "        """Find open precondition with the least number of possible actions"""\n",
+       "\n",
+       "        number_of_ways = dict()\n",
+       "        actions_for_precondition = dict()\n",
+       "        for element in self.agenda:\n",
+       "            open_precondition = element[0]\n",
+       "            possible_actions = list(self.actions) + self.expanded_actions\n",
+       "            for action in possible_actions:\n",
+       "                for effect in action.effect:\n",
+       "                    if effect == open_precondition:\n",
+       "                        if open_precondition in number_of_ways:\n",
+       "                            number_of_ways[open_precondition] += 1\n",
+       "                            actions_for_precondition[open_precondition].append(action)\n",
+       "                        else:\n",
+       "                            number_of_ways[open_precondition] = 1\n",
+       "                            actions_for_precondition[open_precondition] = [action]\n",
+       "\n",
+       "        number = sorted(number_of_ways, key=number_of_ways.__getitem__)\n",
+       "        \n",
+       "        for k, v in number_of_ways.items():\n",
+       "            if v == 0:\n",
+       "                return None, None, None\n",
+       "\n",
+       "        act1 = None\n",
+       "        for element in self.agenda:\n",
+       "            if element[0] == number[0]:\n",
+       "                act1 = element[1]\n",
+       "                break\n",
+       "\n",
+       "        if number[0] in self.expanded_actions:\n",
+       "            self.expanded_actions.remove(number[0])\n",
+       "\n",
+       "        return number[0], act1, actions_for_precondition[number[0]]\n",
+       "\n",
+       "    def find_action_for_precondition(self, oprec):\n",
+       "        """Find action for a given precondition"""\n",
+       "\n",
+       "        # either\n",
+       "        #   choose act0 E Actions such that act0 achieves G\n",
+       "        for action in self.actions:\n",
+       "            for effect in action.effect:\n",
+       "                if effect == oprec:\n",
+       "                    return action, 0\n",
+       "\n",
+       "        # or\n",
+       "        #   choose act0 E Actions such that act0 achieves G\n",
+       "        for action in self.pddl.actions:\n",
+       "            for effect in action.effect:\n",
+       "                if effect.op == oprec.op:\n",
+       "                    bindings = unify(effect, oprec)\n",
+       "                    if bindings is None:\n",
+       "                        break\n",
+       "                    return action, bindings\n",
+       "\n",
+       "    def generate_expr(self, clause, bindings):\n",
+       "        """Generate atomic expression from generic expression given variable bindings"""\n",
+       "\n",
+       "        new_args = []\n",
+       "        for arg in clause.args:\n",
+       "            if arg in bindings:\n",
+       "                new_args.append(bindings[arg])\n",
+       "            else:\n",
+       "                new_args.append(arg)\n",
+       "\n",
+       "        try:\n",
+       "            return Expr(str(clause.name), *new_args)\n",
+       "        except:\n",
+       "            return Expr(str(clause.op), *new_args)\n",
+       "        \n",
+       "    def generate_action_object(self, action, bindings):\n",
+       "        """Generate action object given a generic action andvariable bindings"""\n",
+       "\n",
+       "        # if bindings is 0, it means the action already exists in self.actions\n",
+       "        if bindings == 0:\n",
+       "            return action\n",
+       "\n",
+       "        # bindings cannot be None\n",
+       "        else:\n",
+       "            new_expr = self.generate_expr(action, bindings)\n",
+       "            new_preconds = []\n",
+       "            for precond in action.precond:\n",
+       "                new_precond = self.generate_expr(precond, bindings)\n",
+       "                new_preconds.append(new_precond)\n",
+       "            new_effects = []\n",
+       "            for effect in action.effect:\n",
+       "                new_effect = self.generate_expr(effect, bindings)\n",
+       "                new_effects.append(new_effect)\n",
+       "            return Action(new_expr, new_preconds, new_effects)\n",
+       "\n",
+       "    def cyclic(self, graph):\n",
+       "        """Check cyclicity of a directed graph"""\n",
+       "\n",
+       "        new_graph = dict()\n",
+       "        for element in graph:\n",
+       "            if element[0] in new_graph:\n",
+       "                new_graph[element[0]].append(element[1])\n",
+       "            else:\n",
+       "                new_graph[element[0]] = [element[1]]\n",
+       "\n",
+       "        path = set()\n",
+       "\n",
+       "        def visit(vertex):\n",
+       "            path.add(vertex)\n",
+       "            for neighbor in new_graph.get(vertex, ()):\n",
+       "                if neighbor in path or visit(neighbor):\n",
+       "                    return True\n",
+       "            path.remove(vertex)\n",
+       "            return False\n",
+       "\n",
+       "        value = any(visit(v) for v in new_graph)\n",
+       "        return value\n",
+       "\n",
+       "    def add_const(self, constraint, constraints):\n",
+       "        """Add the constraint to constraints if the resulting graph is acyclic"""\n",
+       "\n",
+       "        if constraint[0] == self.finish or constraint[1] == self.start:\n",
+       "            return constraints\n",
+       "\n",
+       "        new_constraints = set(constraints)\n",
+       "        new_constraints.add(constraint)\n",
+       "\n",
+       "        if self.cyclic(new_constraints):\n",
+       "            return constraints\n",
+       "        return new_constraints\n",
+       "\n",
+       "    def is_a_threat(self, precondition, effect):\n",
+       "        """Check if effect is a threat to precondition"""\n",
+       "\n",
+       "        if (str(effect.op) == 'Not' + str(precondition.op)) or ('Not' + str(effect.op) == str(precondition.op)):\n",
+       "            if effect.args == precondition.args:\n",
+       "                return True\n",
+       "        return False\n",
+       "\n",
+       "    def protect(self, causal_link, action, constraints):\n",
+       "        """Check and resolve threats by promotion or demotion"""\n",
+       "\n",
+       "        threat = False\n",
+       "        for effect in action.effect:\n",
+       "            if self.is_a_threat(causal_link[1], effect):\n",
+       "                threat = True\n",
+       "                break\n",
+       "\n",
+       "        if action != causal_link[0] and action != causal_link[2] and threat:\n",
+       "            # try promotion\n",
+       "            new_constraints = set(constraints)\n",
+       "            new_constraints.add((action, causal_link[0]))\n",
+       "            if not self.cyclic(new_constraints):\n",
+       "                constraints = self.add_const((action, causal_link[0]), constraints)\n",
+       "            else:\n",
+       "                # try demotion\n",
+       "                new_constraints = set(constraints)\n",
+       "                new_constraints.add((causal_link[2], action))\n",
+       "                if not self.cyclic(new_constraints):\n",
+       "                    constraints = self.add_const((causal_link[2], action), constraints)\n",
+       "                else:\n",
+       "                    # both promotion and demotion fail\n",
+       "                    print('Unable to resolve a threat caused by', action, 'onto', causal_link)\n",
+       "                    return\n",
+       "        return constraints\n",
+       "\n",
+       "    def convert(self, constraints):\n",
+       "        """Convert constraints into a dict of Action to set orderings"""\n",
+       "\n",
+       "        graph = dict()\n",
+       "        for constraint in constraints:\n",
+       "            if constraint[0] in graph:\n",
+       "                graph[constraint[0]].add(constraint[1])\n",
+       "            else:\n",
+       "                graph[constraint[0]] = set()\n",
+       "                graph[constraint[0]].add(constraint[1])\n",
+       "        return graph\n",
+       "\n",
+       "    def toposort(self, graph):\n",
+       "        """Generate topological ordering of constraints"""\n",
+       "\n",
+       "        if len(graph) == 0:\n",
+       "            return\n",
+       "\n",
+       "        graph = graph.copy()\n",
+       "\n",
+       "        for k, v in graph.items():\n",
+       "            v.discard(k)\n",
+       "\n",
+       "        extra_elements_in_dependencies = _reduce(set.union, graph.values()) - set(graph.keys())\n",
+       "\n",
+       "        graph.update({element:set() for element in extra_elements_in_dependencies})\n",
+       "        while True:\n",
+       "            ordered = set(element for element, dependency in graph.items() if len(dependency) == 0)\n",
+       "            if not ordered:\n",
+       "                break\n",
+       "            yield ordered\n",
+       "            graph = {element: (dependency - ordered) for element, dependency in graph.items() if element not in ordered}\n",
+       "        if len(graph) != 0:\n",
+       "            raise ValueError('The graph is not acyclic and cannot be linearly ordered')\n",
+       "\n",
+       "    def display_plan(self):\n",
+       "        """Display causal links, constraints and the plan"""\n",
+       "\n",
+       "        print('Causal Links')\n",
+       "        for causal_link in self.causal_links:\n",
+       "            print(causal_link)\n",
+       "\n",
+       "        print('\\nConstraints')\n",
+       "        for constraint in self.constraints:\n",
+       "            print(constraint[0], '<', constraint[1])\n",
+       "\n",
+       "        print('\\nPartial Order Plan')\n",
+       "        print(list(reversed(list(self.toposort(self.convert(self.constraints))))))\n",
+       "\n",
+       "    def execute(self, display=True):\n",
+       "        """Execute the algorithm"""\n",
+       "\n",
+       "        step = 1\n",
+       "        self.tries = 1\n",
+       "        while len(self.agenda) > 0:\n",
+       "            step += 1\n",
+       "            # select <G, act1> from Agenda\n",
+       "            try:\n",
+       "                G, act1, possible_actions = self.find_open_precondition()\n",
+       "            except IndexError:\n",
+       "                print('Probably Wrong')\n",
+       "                break\n",
+       "\n",
+       "            act0 = possible_actions[0]\n",
+       "            # remove <G, act1> from Agenda\n",
+       "            self.agenda.remove((G, act1))\n",
+       "\n",
+       "            # For actions with variable number of arguments, use least commitment principle\n",
+       "            # act0_temp, bindings = self.find_action_for_precondition(G)\n",
+       "            # act0 = self.generate_action_object(act0_temp, bindings)\n",
+       "\n",
+       "            # Actions = Actions U {act0}\n",
+       "            self.actions.add(act0)\n",
+       "\n",
+       "            # Constraints = add_const(start < act0, Constraints)\n",
+       "            self.constraints = self.add_const((self.start, act0), self.constraints)\n",
+       "\n",
+       "            # for each CL E CausalLinks do\n",
+       "            #   Constraints = protect(CL, act0, Constraints)\n",
+       "            for causal_link in self.causal_links:\n",
+       "                self.constraints = self.protect(causal_link, act0, self.constraints)\n",
+       "\n",
+       "            # Agenda = Agenda U {<P, act0>: P is a precondition of act0}\n",
+       "            for precondition in act0.precond:\n",
+       "                self.agenda.add((precondition, act0))\n",
+       "\n",
+       "            # Constraints = add_const(act0 < act1, Constraints)\n",
+       "            self.constraints = self.add_const((act0, act1), self.constraints)\n",
+       "\n",
+       "            # CausalLinks U {<act0, G, act1>}\n",
+       "            if (act0, G, act1) not in self.causal_links:\n",
+       "                self.causal_links.append((act0, G, act1))\n",
+       "\n",
+       "            # for each A E Actions do\n",
+       "            #   Constraints = protect(<act0, G, act1>, A, Constraints)\n",
+       "            for action in self.actions:\n",
+       "                self.constraints = self.protect((act0, G, act1), action, self.constraints)\n",
+       "\n",
+       "            if step > 200:\n",
+       "                print('Couldn\\'t find a solution')\n",
+       "                return None, None\n",
+       "\n",
+       "        if display:\n",
+       "            self.display_plan()\n",
+       "        else:\n",
+       "            return self.constraints, self.causal_links                \n",
+       "
\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "psource(PartialOrderPlanner)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We will first describe the data-structures and helper methods used, followed by the algorithm used to find a partial-order plan." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Each plan has the following four components:\n", + "\n", + "1. **`actions`**: a set of actions that make up the steps of the plan.\n", + "`actions` is always a subset of `pddl.actions` the set of possible actions for the given planning problem. \n", + "The `start` and `finish` actions are dummy actions defined to bring uniformity to the problem. The `start` action has no preconditions and its effects constitute the initial state of the planning problem. \n", + "The `finish` action has no effects and its preconditions constitute the goal state of the planning problem.\n", + "The empty plan consists of just these two dummy actions.\n", + "2. **`constraints`**: a set of temporal constraints that define the order of performing the actions relative to each other.\n", + "`constraints` does not define a linear ordering, rather it usually represents a directed graph which is also acyclic if the plan is consistent.\n", + "Each ordering is of the form A < B, which reads as \"A before B\" and means that action A _must_ be executed sometime before action B, but not necessarily immediately before.\n", + "`constraints` stores these as a set of tuples `(Action(A), Action(B))` which is interpreted as given above.\n", + "A constraint cannot be added to `constraints` if it breaks the acyclicity of the existing graph.\n", + "3. **`causal_links`**: a set of causal-links. \n", + "A causal link between two actions _A_ and _B_ in the plan is written as _A_ --_p_--> _B_ and is read as \"A achieves p for B\".\n", + "This imples that _p_ is an effect of _A_ and a precondition of _B_.\n", + "It also asserts that _p_ must remain true from the time of action _A_ to the time of action _B_.\n", + "Any violation of this rule is called a threat and must be resolved immediately by adding suitable ordering constraints.\n", + "`causal_links` stores this information as tuples `(Action(A), precondition(p), Action(B))` which is interpreted as given above.\n", + "Causal-links can also be called **protection-intervals**, because the link _A_ --_p_--> _B_ protects _p_ from being negated over the interval from _A_ to _B_.\n", + "4. **`agenda`**: a set of open-preconditions.\n", + "A precondition is open if it is not achieved by some action in the plan.\n", + "Planners will work to reduce the set of open preconditions to the empty set, without introducing a contradiction.\n", + "`agenda` stored this information as tuples `(precondition(p), Action(A))` where p is a precondition of the action A.\n", + "\n", + "A **consistent plan** is a plan in which there are no cycles in the ordering constraints and no conflicts with the causal-links.\n", + "A consistent plan with no open preconditions is a **solution**.\n", + "
\n", "
\n", + "Let's briefly glance over the helper functions before going into the actual algorithm.\n", "
\n", - "The gray lines indicate mutual exclusivity.\n", - "This means that the actions connected by a gray line cannot be taken together.\n", - "Mutual exclusivity (mutex) occurs in the following cases:\n", - "1. **Inconsistent effects**: One action negates the effect of the other. For example, _Eat(Cake)_ and the persistence of _Have(Cake)_ have inconsistent effects because they disagree on the effect _Have(Cake)_\n", - "2. **Interference**: One of the effects of an action is the negation of a precondition of the other. For example, _Eat(Cake)_ interferes with the persistence of _Have(Cake)_ by negating its precondition.\n", - "3. **Competing needs**: One of the preconditions of one action is mutually exclusive with a precondition of the other. For example, _Bake(Cake)_ and _Eat(Cake)_ are mutex because they compete on the value of the _Have(Cake)_ precondition." + "**`expand_actions`**: generates all possible actions with variable bindings for use as a heuristic of selection of an open precondition.\n", + "
\n", + "**`find_open_precondition`**: finds a precondition from the agenda with the least number of actions that fulfil that precondition.\n", + "This heuristic helps form mandatory ordering constraints and causal-links to further simplify the problem and reduce the probability of encountering a threat.\n", + "
\n", + "**`find_action_for_precondition`**: finds an action that fulfils the given precondition along with the absolutely necessary variable bindings in accordance with the principle of _least commitment_.\n", + "In case of multiple possible actions, the action with the least number of effects is chosen to minimize the chances of encountering a threat.\n", + "
\n", + "**`cyclic`**: checks if a directed graph is cyclic.\n", + "
\n", + "**`add_const`**: adds `constraint` to `constraints` if the newly formed graph is acyclic and returns `constraints` otherwise.\n", + "
\n", + "**`is_a_threat`**: checks if the given `effect` negates the given `precondition`.\n", + "
\n", + "**`protect`**: checks if the given `action` poses a threat to the given `causal_link`.\n", + "If so, the threat is resolved by either promotion or demotion, whichever generates acyclic temporal constraints.\n", + "If neither promotion or demotion work, the chosen action is not the correct fit or the planning problem cannot be solved altogether.\n", + "
\n", + "**`convert`**: converts a graph from a list of edges to an `Action` : `set` mapping, for use in topological sorting.\n", + "
\n", + "**`toposort`**: a generator function that generates a topological ordering of a given graph as a list of sets.\n", + "Each set contains an action or several actions.\n", + "If a set has more that one action in it, it means that permutations between those actions also produce a valid plan.\n", + "
\n", + "**`display_plan`**: displays the `causal_links`, `constraints` and the partial order plan generated from `toposort`.\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The **`execute`** method executes the algorithm, which is summarized below:\n", + "
\n", + "1. An open precondition is selected (a sub-goal that we want to achieve).\n", + "2. An action that fulfils the open precondition is chosen.\n", + "3. Temporal constraints are updated.\n", + "4. Existing causal links are protected. Protection is a method that checks if the causal links conflict\n", + " and if they do, temporal constraints are added to fix the threats.\n", + "5. The set of open preconditions is updated.\n", + "6. Temporal constraints of the selected action and the next action are established.\n", + "7. A new causal link is added between the selected action and the owner of the open precondition.\n", + "8. The set of new causal links is checked for threats and if found, the threat is removed by either promotion or demotion.\n", + " If promotion or demotion is unable to solve the problem, the planning problem cannot be solved with the current sequence of actions\n", + " or it may not be solvable at all.\n", + "9. These steps are repeated until the set of open preconditions is empty." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A partial-order plan can be used to generate different valid total-order plans.\n", + "This step is called **linearization** of the partial-order plan.\n", + "All possible linearizations of a partial-order plan for `socks_and_shoes` looks like this.\n", + "
\n", + "![title](images/pop.jpg)\n", + "
\n", + "Linearization can be carried out in many ways, but the most efficient way is to represent the set of temporal constraints as a directed graph.\n", + "We can easily realize that the graph should also be acyclic as cycles in constraints means that the constraints are inconsistent.\n", + "This acyclicity is enforced by the `add_const` method, which adds a new constraint only if the acyclicity of the existing graph is not violated.\n", + "The `protect` method also checks for acyclicity of the newly-added temporal constraints to make a decision between promotion and demotion in case of a threat.\n", + "This property of a graph created from the temporal constraints of a valid partial-order plan allows us to use topological sort to order the constraints linearly.\n", + "A topological sort may produce several different valid solutions for a given directed acyclic graph." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now that we know how `PartialOrderPlanner` works, let's solve a few problems using it." + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Causal Links\n", + "(Action(PutOn(Spare, Axle)), At(Spare, Axle), Action(Finish))\n", + "(Action(Start), Tire(Spare), Action(PutOn(Spare, Axle)))\n", + "(Action(Remove(Flat, Axle)), NotAt(Flat, Axle), Action(PutOn(Spare, Axle)))\n", + "(Action(Start), At(Flat, Axle), Action(Remove(Flat, Axle)))\n", + "(Action(Remove(Spare, Trunk)), At(Spare, Ground), Action(PutOn(Spare, Axle)))\n", + "(Action(Start), At(Spare, Trunk), Action(Remove(Spare, Trunk)))\n", + "(Action(Remove(Flat, Axle)), At(Flat, Ground), Action(Finish))\n", + "\n", + "Constraints\n", + "Action(Start) < Action(Finish)\n", + "Action(Start) < Action(Remove(Spare, Trunk))\n", + "Action(Remove(Flat, Axle)) < Action(PutOn(Spare, Axle))\n", + "Action(Remove(Flat, Axle)) < Action(Finish)\n", + "Action(Remove(Spare, Trunk)) < Action(PutOn(Spare, Axle))\n", + "Action(Start) < Action(PutOn(Spare, Axle))\n", + "Action(Start) < Action(Remove(Flat, Axle))\n", + "Action(PutOn(Spare, Axle)) < Action(Finish)\n", + "\n", + "Partial Order Plan\n", + "[{Action(Start)}, {Action(Remove(Flat, Axle)), Action(Remove(Spare, Trunk))}, {Action(PutOn(Spare, Axle))}, {Action(Finish)}]\n" + ] + } + ], + "source": [ + "st = spare_tire()\n", + "pop = PartialOrderPlanner(st)\n", + "pop.execute()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We observe that in the given partial order plan, Remove(Flat, Axle) and Remove(Spare, Trunk) are in the same set.\n", + "This means that the order of performing these actions does not affect the final outcome.\n", + "That aside, we also see that the PutOn(Spare, Axle) action has to be performed after both the Remove actions are complete, which seems logically consistent." + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Causal Links\n", + "(Action(FromTable(B, A)), On(B, A), Action(Finish))\n", + "(Action(FromTable(C, B)), On(C, B), Action(Finish))\n", + "(Action(Start), Clear(C), Action(FromTable(C, B)))\n", + "(Action(Start), Clear(A), Action(FromTable(B, A)))\n", + "(Action(Start), OnTable(C), Action(FromTable(C, B)))\n", + "(Action(Start), OnTable(B), Action(FromTable(B, A)))\n", + "(Action(ToTable(A, B)), Clear(B), Action(FromTable(C, B)))\n", + "(Action(Start), On(A, B), Action(ToTable(A, B)))\n", + "(Action(ToTable(A, B)), Clear(B), Action(FromTable(B, A)))\n", + "(Action(Start), Clear(A), Action(ToTable(A, B)))\n", + "\n", + "Constraints\n", + "Action(Start) < Action(FromTable(B, A))\n", + "Action(Start) < Action(FromTable(C, B))\n", + "Action(Start) < Action(ToTable(A, B))\n", + "Action(ToTable(A, B)) < Action(FromTable(C, B))\n", + "Action(Start) < Action(Finish)\n", + "Action(ToTable(A, B)) < Action(FromTable(B, A))\n", + "Action(FromTable(C, B)) < Action(Finish)\n", + "Action(FromTable(B, A)) < Action(Finish)\n", + "Action(FromTable(B, A)) < Action(FromTable(C, B))\n", + "\n", + "Partial Order Plan\n", + "[{Action(Start)}, {Action(ToTable(A, B))}, {Action(FromTable(B, A))}, {Action(FromTable(C, B))}, {Action(Finish)}]\n" + ] + } + ], + "source": [ + "sbw = simple_blocks_world()\n", + "pop = PartialOrderPlanner(sbw)\n", + "pop.execute()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": true + }, + "source": [ + "We see that this plan does not have flexibility in selecting actions, ie, actions should be performed in this order and this order only, to successfully reach the goal state." + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Causal Links\n", + "(Action(RightShoe), RightShoeOn, Action(Finish))\n", + "(Action(LeftShoe), LeftShoeOn, Action(Finish))\n", + "(Action(LeftSock), LeftSockOn, Action(LeftShoe))\n", + "(Action(RightSock), RightSockOn, Action(RightShoe))\n", + "\n", + "Constraints\n", + "Action(Start) < Action(RightSock)\n", + "Action(Start) < Action(LeftSock)\n", + "Action(RightSock) < Action(RightShoe)\n", + "Action(RightShoe) < Action(Finish)\n", + "Action(Start) < Action(LeftShoe)\n", + "Action(LeftSock) < Action(LeftShoe)\n", + "Action(Start) < Action(RightShoe)\n", + "Action(Start) < Action(Finish)\n", + "Action(LeftShoe) < Action(Finish)\n", + "\n", + "Partial Order Plan\n", + "[{Action(Start)}, {Action(LeftSock), Action(RightSock)}, {Action(RightShoe), Action(LeftShoe)}, {Action(Finish)}]\n" + ] + } + ], + "source": [ + "ss = socks_and_shoes()\n", + "pop = PartialOrderPlanner(ss)\n", + "pop.execute()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": true + }, + "source": [ + "This plan again doesn't have constraints in selecting socks or shoes.\n", + "As long as both socks are worn before both shoes, we are fine.\n", + "Notice however, there is one valid solution,\n", + "
\n", + "LeftSock -> LeftShoe -> RightSock -> RightShoe\n", + "
\n", + "that the algorithm could not find as it cannot be represented as a general partially-ordered plan but is a specific total-order solution." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Runtime differences\n", + "Let's briefly take a look at the running time of all the three algorithms on the `socks_and_shoes` problem." + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "ss = socks_and_shoes()" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "333 µs ± 8.86 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n" + ] + } + ], + "source": [ + "%%timeit\n", + "GraphPlan(ss).execute()" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1.29 ms ± 43.6 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n" + ] + } + ], + "source": [ + "%%timeit\n", + "Linearize(ss).execute()" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "425 µs ± 17 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n" + ] + } + ], + "source": [ + "%%timeit\n", + "PartialOrderPlanner(ss).execute(display=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We observe that `GraphPlan` is about 4 times faster than `Linearize` because `Linearize` essentially runs a `GraphPlan` subroutine under the hood and then carries out some transformations on the solved planning-graph.\n", + "
\n", + "We also find that `GraphPlan` is slightly faster than `PartialOrderPlanner`, but this is mainly due to the `expand_actions` method in `PartialOrderPlanner` that slows it down as it generates all possible permutations of actions and variable bindings.\n", + "
\n", + "Without heuristic functions, `PartialOrderPlanner` will be atleast as fast as `GraphPlan`, if not faster, but will have a higher tendency to encounter threats and conflicts which might take additional time to resolve.\n", + "
\n", + "Different planning algorithms work differently for different problems." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "In the module, planning graphs have been implemented using two classes, `Level` which stores data for a particular level and `Graph` which connects multiple levels together.\n", - "Let's look at the `Level` class." + "## PLANNING IN THE REAL WORLD\n", + "---\n", + "## PROBLEM\n", + "The `Problem` class is a wrapper for `PlanningProblem` with some additional functionality and data-structures to handle real-world planning problems that involve time and resource constraints.\n", + "The `Problem` class includes everything that the `PlanningProblem` class includes.\n", + "Additionally, it also includes the following attributes essential to define a real-world planning problem:\n", + "- a list of `jobs` to be done\n", + "- a dictionary of `resources`\n", + "\n", + "It also overloads the `act` method to call the `do_action` method of the `HLA` class, \n", + "and also includes a new method `refinements` that finds refinements or primitive actions for high level actions.\n", + "
\n", + "`hierarchical_search` and `angelic_search` are also built into the `Problem` class to solve such planning problems." ] }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 74, "metadata": {}, "outputs": [ { @@ -2210,135 +4985,102 @@ "\n", "

\n", "\n", - "
class Level:\n",
+       "
class Problem(PlanningProblem):\n",
        "    """\n",
-       "    Contains the state of the planning problem\n",
-       "    and exhaustive list of actions which use the\n",
-       "    states as pre-condition.\n",
-       "    """\n",
-       "\n",
-       "    def __init__(self, kb):\n",
-       "        """Initializes variables to hold state and action details of a level"""\n",
-       "\n",
-       "        self.kb = kb\n",
-       "        # current state\n",
-       "        self.current_state = kb.clauses\n",
-       "        # current action to state link\n",
-       "        self.current_action_links = {}\n",
-       "        # current state to action link\n",
-       "        self.current_state_links = {}\n",
-       "        # current action to next state link\n",
-       "        self.next_action_links = {}\n",
-       "        # next state to current action link\n",
-       "        self.next_state_links = {}\n",
-       "        # mutually exclusive actions\n",
-       "        self.mutex = []\n",
-       "\n",
-       "    def __call__(self, actions, objects):\n",
-       "        self.build(actions, objects)\n",
-       "        self.find_mutex()\n",
+       "    Define real-world problems by aggregating resources as numerical quantities instead of\n",
+       "    named entities.\n",
        "\n",
-       "    def separate(self, e):\n",
-       "        """Separates an iterable of elements into positive and negative parts"""\n",
-       "\n",
-       "        positive = []\n",
-       "        negative = []\n",
-       "        for clause in e:\n",
-       "            if clause.op[:3] == 'Not':\n",
-       "                negative.append(clause)\n",
-       "            else:\n",
-       "                positive.append(clause)\n",
-       "        return positive, negative\n",
-       "\n",
-       "    def find_mutex(self):\n",
-       "        """Finds mutually exclusive actions"""\n",
-       "\n",
-       "        # Inconsistent effects\n",
-       "        pos_nsl, neg_nsl = self.separate(self.next_state_links)\n",
+       "    This class is identical to PDLL, except that it overloads the act function to handle\n",
+       "    resource and ordering conditions imposed by HLA as opposed to Action.\n",
+       "    """\n",
+       "    def __init__(self, init, goals, actions, jobs=None, resources=None):\n",
+       "        super().__init__(init, goals, actions)\n",
+       "        self.jobs = jobs\n",
+       "        self.resources = resources or {}\n",
        "\n",
-       "        for negeff in neg_nsl:\n",
-       "            new_negeff = Expr(negeff.op[3:], *negeff.args)\n",
-       "            for poseff in pos_nsl:\n",
-       "                if new_negeff == poseff:\n",
-       "                    for a in self.next_state_links[poseff]:\n",
-       "                        for b in self.next_state_links[negeff]:\n",
-       "                            if {a, b} not in self.mutex:\n",
-       "                                self.mutex.append({a, b})\n",
+       "    def act(self, action):\n",
+       "        """\n",
+       "        Performs the HLA given as argument.\n",
        "\n",
-       "        # Interference will be calculated with the last step\n",
-       "        pos_csl, neg_csl = self.separate(self.current_state_links)\n",
+       "        Note that this is different from the superclass action - where the parameter was an\n",
+       "        Expression. For real world problems, an Expr object isn't enough to capture all the\n",
+       "        detail required for executing the action - resources, preconditions, etc need to be\n",
+       "        checked for too.\n",
+       "        """\n",
+       "        args = action.args\n",
+       "        list_action = first(a for a in self.actions if a.name == action.name)\n",
+       "        if list_action is None:\n",
+       "            raise Exception("Action '{}' not found".format(action.name))\n",
+       "        self.init = list_action.do_action(self.jobs, self.resources, self.init, args).clauses\n",
        "\n",
-       "        # Competing needs\n",
-       "        for posprecond in pos_csl:\n",
-       "            for negprecond in neg_csl:\n",
-       "                new_negprecond = Expr(negprecond.op[3:], *negprecond.args)\n",
-       "                if new_negprecond == posprecond:\n",
-       "                    for a in self.current_state_links[posprecond]:\n",
-       "                        for b in self.current_state_links[negprecond]:\n",
-       "                            if {a, b} not in self.mutex:\n",
-       "                                self.mutex.append({a, b})\n",
+       "    def refinements(hla, state, library):  # TODO - refinements may be (multiple) HLA themselves ...\n",
+       "        """\n",
+       "        state is a Problem, containing the current state kb\n",
+       "        library is a dictionary containing details for every possible refinement. eg:\n",
+       "        {\n",
+       "        'HLA': ['Go(Home,SFO)', 'Go(Home,SFO)', 'Drive(Home, SFOLongTermParking)', 'Shuttle(SFOLongTermParking, SFO)', 'Taxi(Home, SFO)'],\n",
+       "        'steps': [['Drive(Home, SFOLongTermParking)', 'Shuttle(SFOLongTermParking, SFO)'], ['Taxi(Home, SFO)'], [], [], []],\n",
+       "        # empty refinements ie primitive action\n",
+       "        'precond': [['At(Home), Have(Car)'], ['At(Home)'], ['At(Home)', 'Have(Car)'], ['At(SFOLongTermParking)'], ['At(Home)']],\n",
+       "        'effect': [['At(SFO)'], ['At(SFO)'], ['At(SFOLongTermParking)'], ['At(SFO)'], ['At(SFO)'], ['~At(Home)'], ['~At(Home)'], ['~At(Home)'], ['~At(SFOLongTermParking)'], ['~At(Home)']]\n",
+       "        }\n",
+       "        """\n",
+       "        e = Expr(hla.name, hla.args)\n",
+       "        indices = [i for i, x in enumerate(library['HLA']) if expr(x).op == hla.name]\n",
+       "        for i in indices:\n",
+       "            # TODO multiple refinements\n",
+       "            precond = []\n",
+       "            for p in library['precond'][i]:\n",
+       "                if p[0] == '~':\n",
+       "                    precond.append(expr('Not' + p[1:]))\n",
+       "                else:\n",
+       "                    precond.append(expr(p))\n",
+       "            effect = []\n",
+       "            for e in library['effect'][i]:\n",
+       "                if e[0] == '~':\n",
+       "                    effect.append(expr('Not' + e[1:]))\n",
+       "                else:\n",
+       "                    effect.append(expr(e))\n",
+       "            action = HLA(library['steps'][i][0], precond, effect)\n",
+       "            if action.check_precond(state.init, action.args):\n",
+       "                yield action\n",
        "\n",
-       "        # Inconsistent support\n",
-       "        state_mutex = []\n",
-       "        for pair in self.mutex:\n",
-       "            next_state_0 = self.next_action_links[list(pair)[0]]\n",
-       "            if len(pair) == 2:\n",
-       "                next_state_1 = self.next_action_links[list(pair)[1]]\n",
+       "    def hierarchical_search(problem, hierarchy):\n",
+       "        """\n",
+       "        [Figure 11.5] 'Hierarchical Search, a Breadth First Search implementation of Hierarchical\n",
+       "        Forward Planning Search'\n",
+       "        The problem is a real-world problem defined by the problem class, and the hierarchy is\n",
+       "        a dictionary of HLA - refinements (see refinements generator for details)\n",
+       "        """\n",
+       "        act = Node(problem.actions[0])\n",
+       "        frontier = deque()\n",
+       "        frontier.append(act)\n",
+       "        while True:\n",
+       "            if not frontier:\n",
+       "                return None\n",
+       "            plan = frontier.popleft()\n",
+       "            print(plan.state.name)\n",
+       "            hla = plan.state  # first_or_null(plan)\n",
+       "            prefix = None\n",
+       "            if plan.parent:\n",
+       "                prefix = plan.parent.state.action  # prefix, suffix = subseq(plan.state, hla)\n",
+       "            outcome = Problem.result(problem, prefix)\n",
+       "            if hla is None:\n",
+       "                if outcome.goal_test():\n",
+       "                    return plan.path()\n",
        "            else:\n",
-       "                next_state_1 = self.next_action_links[list(pair)[0]]\n",
-       "            if (len(next_state_0) == 1) and (len(next_state_1) == 1):\n",
-       "                state_mutex.append({next_state_0[0], next_state_1[0]})\n",
-       "        \n",
-       "        self.mutex = self.mutex + state_mutex\n",
-       "\n",
-       "    def build(self, actions, objects):\n",
-       "        """Populates the lists and dictionaries containing the state action dependencies"""\n",
-       "\n",
-       "        for clause in self.current_state:\n",
-       "            p_expr = Expr('P' + clause.op, *clause.args)\n",
-       "            self.current_action_links[p_expr] = [clause]\n",
-       "            self.next_action_links[p_expr] = [clause]\n",
-       "            self.current_state_links[clause] = [p_expr]\n",
-       "            self.next_state_links[clause] = [p_expr]\n",
-       "\n",
-       "        for a in actions:\n",
-       "            num_args = len(a.args)\n",
-       "            possible_args = tuple(itertools.permutations(objects, num_args))\n",
-       "\n",
-       "            for arg in possible_args:\n",
-       "                if a.check_precond(self.kb, arg):\n",
-       "                    for num, symbol in enumerate(a.args):\n",
-       "                        if not symbol.op.islower():\n",
-       "                            arg = list(arg)\n",
-       "                            arg[num] = symbol\n",
-       "                            arg = tuple(arg)\n",
-       "\n",
-       "                    new_action = a.substitute(Expr(a.name, *a.args), arg)\n",
-       "                    self.current_action_links[new_action] = []\n",
-       "\n",
-       "                    for clause in a.precond:\n",
-       "                        new_clause = a.substitute(clause, arg)\n",
-       "                        self.current_action_links[new_action].append(new_clause)\n",
-       "                        if new_clause in self.current_state_links:\n",
-       "                            self.current_state_links[new_clause].append(new_action)\n",
-       "                        else:\n",
-       "                            self.current_state_links[new_clause] = [new_action]\n",
-       "                   \n",
-       "                    self.next_action_links[new_action] = []\n",
-       "                    for clause in a.effect:\n",
-       "                        new_clause = a.substitute(clause, arg)\n",
-       "\n",
-       "                        self.next_action_links[new_action].append(new_clause)\n",
-       "                        if new_clause in self.next_state_links:\n",
-       "                            self.next_state_links[new_clause].append(new_action)\n",
-       "                        else:\n",
-       "                            self.next_state_links[new_clause] = [new_action]\n",
-       "\n",
-       "    def perform_actions(self):\n",
-       "        """Performs the necessary actions and returns a new Level"""\n",
+       "                print("else")\n",
+       "                for sequence in Problem.refinements(hla, outcome, hierarchy):\n",
+       "                    print("...")\n",
+       "                    frontier.append(Node(plan.state, plan.parent, sequence))\n",
        "\n",
-       "        new_kb = FolKB(list(set(self.next_state_links.keys())))\n",
-       "        return Level(new_kb)\n",
+       "    def result(problem, action):\n",
+       "        """The outcome of applying an action to the current problem"""\n",
+       "        if action is not None:\n",
+       "            problem.act(action)\n",
+       "            return problem\n",
+       "        else:\n",
+       "            return problem\n",
        "
\n", "\n", "\n" @@ -2352,39 +5094,20 @@ } ], "source": [ - "psource(Level)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Each level stores the following data\n", - "1. The current state of the level in `current_state`\n", - "2. Links from an action to its preconditions in `current_action_links`\n", - "3. Links from a state to the possible actions in that state in `current_state_links`\n", - "4. Links from each action to its effects in `next_action_links`\n", - "5. Links from each possible next state from each action in `next_state_links`. This stores the same information as the `current_action_links` of the next level.\n", - "6. Mutex links in `mutex`.\n", - "
\n", - "
\n", - "The `find_mutex` method finds the mutex links according to the points given above.\n", - "
\n", - "The `build` method populates the data structures storing the state and action information.\n", - "Persistence actions for each clause in the current state are also defined here. \n", - "The newly created persistence action has the same name as its state, prefixed with a 'P'." + "psource(Problem)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Let's now look at the `Graph` class." + "## HLA\n", + "To be able to model a real-world planning problem properly, it is essential to be able to represent a _high-level action (HLA)_ that can be hierarchically reduced to primitive actions." ] }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 75, "metadata": {}, "outputs": [ { @@ -2476,36 +5199,85 @@ "\n", "

\n", "\n", - "
class Graph:\n",
+       "
class HLA(Action):\n",
        "    """\n",
-       "    Contains levels of state and actions\n",
-       "    Used in graph planning algorithm to extract a solution\n",
+       "    Define Actions for the real-world (that may be refined further), and satisfy resource\n",
+       "    constraints.\n",
        "    """\n",
+       "    unique_group = 1\n",
        "\n",
-       "    def __init__(self, pddl):\n",
-       "        self.pddl = pddl\n",
-       "        self.kb = FolKB(pddl.init)\n",
-       "        self.levels = [Level(self.kb)]\n",
-       "        self.objects = set(arg for clause in self.kb.clauses for arg in clause.args)\n",
-       "\n",
-       "    def __call__(self):\n",
-       "        self.expand_graph()\n",
-       "\n",
-       "    def expand_graph(self):\n",
-       "        """Expands the graph by a level"""\n",
+       "    def __init__(self, action, precond=None, effect=None, duration=0,\n",
+       "                 consume=None, use=None):\n",
+       "        """\n",
+       "        As opposed to actions, to define HLA, we have added constraints.\n",
+       "        duration holds the amount of time required to execute the task\n",
+       "        consumes holds a dictionary representing the resources the task consumes\n",
+       "        uses holds a dictionary representing the resources the task uses\n",
+       "        """\n",
+       "        precond = precond or [None]\n",
+       "        effect = effect or [None]\n",
+       "        super().__init__(action, precond, effect)\n",
+       "        self.duration = duration\n",
+       "        self.consumes = consume or {}\n",
+       "        self.uses = use or {}\n",
+       "        self.completed = False\n",
+       "        # self.priority = -1 #  must be assigned in relation to other HLAs\n",
+       "        # self.job_group = -1 #  must be assigned in relation to other HLAs\n",
        "\n",
-       "        last_level = self.levels[-1]\n",
-       "        last_level(self.pddl.actions, self.objects)\n",
-       "        self.levels.append(last_level.perform_actions())\n",
+       "    def do_action(self, job_order, available_resources, kb, args):\n",
+       "        """\n",
+       "        An HLA based version of act - along with knowledge base updation, it handles\n",
+       "        resource checks, and ensures the actions are executed in the correct order.\n",
+       "        """\n",
+       "        # print(self.name)\n",
+       "        if not self.has_usable_resource(available_resources):\n",
+       "            raise Exception('Not enough usable resources to execute {}'.format(self.name))\n",
+       "        if not self.has_consumable_resource(available_resources):\n",
+       "            raise Exception('Not enough consumable resources to execute {}'.format(self.name))\n",
+       "        if not self.inorder(job_order):\n",
+       "            raise Exception("Can't execute {} - execute prerequisite actions first".\n",
+       "                            format(self.name))\n",
+       "        kb = super().act(kb, args)  # update knowledge base\n",
+       "        for resource in self.consumes:  # remove consumed resources\n",
+       "            available_resources[resource] -= self.consumes[resource]\n",
+       "        self.completed = True  # set the task status to complete\n",
+       "        return kb\n",
        "\n",
-       "    def non_mutex_goals(self, goals, index):\n",
-       "        """Checks whether the goals are mutually exclusive"""\n",
+       "    def has_consumable_resource(self, available_resources):\n",
+       "        """\n",
+       "        Ensure there are enough consumable resources for this action to execute.\n",
+       "        """\n",
+       "        for resource in self.consumes:\n",
+       "            if available_resources.get(resource) is None:\n",
+       "                return False\n",
+       "            if available_resources[resource] < self.consumes[resource]:\n",
+       "                return False\n",
+       "        return True\n",
        "\n",
-       "        goal_perm = itertools.combinations(goals, 2)\n",
-       "        for g in goal_perm:\n",
-       "            if set(g) in self.levels[index].mutex:\n",
+       "    def has_usable_resource(self, available_resources):\n",
+       "        """\n",
+       "        Ensure there are enough usable resources for this action to execute.\n",
+       "        """\n",
+       "        for resource in self.uses:\n",
+       "            if available_resources.get(resource) is None:\n",
+       "                return False\n",
+       "            if available_resources[resource] < self.uses[resource]:\n",
        "                return False\n",
        "        return True\n",
+       "\n",
+       "    def inorder(self, job_order):\n",
+       "        """\n",
+       "        Ensure that all the jobs that had to be executed before the current one have been\n",
+       "        successfully executed.\n",
+       "        """\n",
+       "        for jobs in job_order:\n",
+       "            if self in jobs:\n",
+       "                for job in jobs:\n",
+       "                    if job is self:\n",
+       "                        return True\n",
+       "                    if not job.completed:\n",
+       "                        return False\n",
+       "        return True\n",
        "
\n", "\n", "\n" @@ -2519,31 +5291,42 @@ } ], "source": [ - "psource(Graph)" + "psource(HLA)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "The class stores a problem definition in `pddl`, \n", - "a knowledge base in `kb`, \n", - "a list of `Level` objects in `levels` and \n", - "all the possible arguments found in the initial state of the problem in `objects`.\n", - "
\n", - "The `expand_graph` method generates a new level of the graph.\n", - "This method is invoked when the goal conditions haven't been met in the current level or the actions that lead to it are mutually exclusive.\n", - "The `non_mutex_goals` method checks whether the goals in the current state are mutually exclusive.\n", - "
\n", - "
\n", - "Using these two classes, we can define a planning graph which can either be used to provide reliable heuristics for planning problems or used in the `GraphPlan` algorithm.\n", + "In addition to preconditions and effects, an object of the `HLA` class also stores:\n", + "- the `duration` of the HLA\n", + "- the quantity of consumption of _consumable_ resources\n", + "- the quantity of _reusable_ resources used\n", + "- a bool `completed` denoting if the `HLA` has been completed\n", + "\n", + "The class also has some useful helper methods:\n", + "- `do_action`: checks if required consumable and reusable resources are available and if so, executes the action.\n", + "- `has_consumable_resource`: checks if there exists sufficient quantity of the required consumable resource.\n", + "- `has_usable_resource`: checks if reusable resources are available and not already engaged.\n", + "- `inorder`: ensures that all the jobs that had to be executed before the current one have been successfully executed." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## PLANNING PROBLEMS\n", + "---\n", + "## Job-shop Problem\n", + "This is a simple problem involving the assembly of two cars simultaneously.\n", + "The problem consists of two jobs, each of the form [`AddEngine`, `AddWheels`, `Inspect`] to be performed on two cars with different requirements and availability of resources.\n", "
\n", - "Let's have a look at the `GraphPlan` class." + "Let's look at how the `job_shop_problem` has been defined on the module." ] }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 76, "metadata": {}, "outputs": [ { @@ -2630,90 +5413,54 @@ "body .vm { color: #19177C } /* Name.Variable.Magic */\n", "body .il { color: #666666 } /* Literal.Number.Integer.Long */\n", "\n", - " \n", - "\n", - "\n", - "

\n", - "\n", - "
class GraphPlan:\n",
-       "    """\n",
-       "    Class for formulation GraphPlan algorithm\n",
-       "    Constructs a graph of state and action space\n",
-       "    Returns solution for the planning problem\n",
-       "    """\n",
-       "\n",
-       "    def __init__(self, pddl):\n",
-       "        self.graph = Graph(pddl)\n",
-       "        self.nogoods = []\n",
-       "        self.solution = []\n",
-       "\n",
-       "    def check_leveloff(self):\n",
-       "        """Checks if the graph has levelled off"""\n",
-       "\n",
-       "        check = (set(self.graph.levels[-1].current_state) == set(self.graph.levels[-2].current_state))\n",
-       "\n",
-       "        if check:\n",
-       "            return True\n",
-       "\n",
-       "    def extract_solution(self, goals, index):\n",
-       "        """Extracts the solution"""\n",
-       "\n",
-       "        level = self.graph.levels[index]    \n",
-       "        if not self.graph.non_mutex_goals(goals, index):\n",
-       "            self.nogoods.append((level, goals))\n",
-       "            return\n",
-       "\n",
-       "        level = self.graph.levels[index - 1]    \n",
-       "\n",
-       "        # Create all combinations of actions that satisfy the goal    \n",
-       "        actions = []\n",
-       "        for goal in goals:\n",
-       "            actions.append(level.next_state_links[goal])    \n",
-       "\n",
-       "        all_actions = list(itertools.product(*actions))    \n",
+       "  \n",
+       "\n",
+       "\n",
+       "

\n", "\n", - " # Filter out non-mutex actions\n", - " non_mutex_actions = [] \n", - " for action_tuple in all_actions:\n", - " action_pairs = itertools.combinations(list(set(action_tuple)), 2) \n", - " non_mutex_actions.append(list(set(action_tuple))) \n", - " for pair in action_pairs: \n", - " if set(pair) in level.mutex:\n", - " non_mutex_actions.pop(-1)\n", - " break\n", - " \n", + "
def job_shop_problem():\n",
+       "    """\n",
+       "    [Figure 11.1] JOB-SHOP-PROBLEM\n",
        "\n",
-       "        # Recursion\n",
-       "        for action_list in non_mutex_actions:        \n",
-       "            if [action_list, index] not in self.solution:\n",
-       "                self.solution.append([action_list, index])\n",
+       "    A job-shop scheduling problem for assembling two cars,\n",
+       "    with resource and ordering constraints.\n",
        "\n",
-       "                new_goals = []\n",
-       "                for act in set(action_list):                \n",
-       "                    if act in level.current_action_links:\n",
-       "                        new_goals = new_goals + level.current_action_links[act]\n",
+       "    Example:\n",
+       "    >>> from planning import *\n",
+       "    >>> p = job_shop_problem()\n",
+       "    >>> p.goal_test()\n",
+       "    False\n",
+       "    >>> p.act(p.jobs[1][0])\n",
+       "    >>> p.act(p.jobs[1][1])\n",
+       "    >>> p.act(p.jobs[1][2])\n",
+       "    >>> p.act(p.jobs[0][0])\n",
+       "    >>> p.act(p.jobs[0][1])\n",
+       "    >>> p.goal_test()\n",
+       "    False\n",
+       "    >>> p.act(p.jobs[0][2])\n",
+       "    >>> p.goal_test()\n",
+       "    True\n",
+       "    >>>\n",
+       "    """\n",
+       "    resources = {'EngineHoists': 1, 'WheelStations': 2, 'Inspectors': 2, 'LugNuts': 500}\n",
        "\n",
-       "                if abs(index) + 1 == len(self.graph.levels):\n",
-       "                    return\n",
-       "                elif (level, new_goals) in self.nogoods:\n",
-       "                    return\n",
-       "                else:\n",
-       "                    self.extract_solution(new_goals, index - 1)\n",
+       "    add_engine1 = HLA('AddEngine1', precond='~Has(C1, E1)', effect='Has(C1, E1)', duration=30, use={'EngineHoists': 1})\n",
+       "    add_engine2 = HLA('AddEngine2', precond='~Has(C2, E2)', effect='Has(C2, E2)', duration=60, use={'EngineHoists': 1})\n",
+       "    add_wheels1 = HLA('AddWheels1', precond='~Has(C1, W1)', effect='Has(C1, W1)', duration=30, use={'WheelStations': 1}, consume={'LugNuts': 20})\n",
+       "    add_wheels2 = HLA('AddWheels2', precond='~Has(C2, W2)', effect='Has(C2, W2)', duration=15, use={'WheelStations': 1}, consume={'LugNuts': 20})\n",
+       "    inspect1 = HLA('Inspect1', precond='~Inspected(C1)', effect='Inspected(C1)', duration=10, use={'Inspectors': 1})\n",
+       "    inspect2 = HLA('Inspect2', precond='~Inspected(C2)', effect='Inspected(C2)', duration=10, use={'Inspectors': 1})\n",
        "\n",
-       "        # Level-Order multiple solutions\n",
-       "        solution = []\n",
-       "        for item in self.solution:\n",
-       "            if item[1] == -1:\n",
-       "                solution.append([])\n",
-       "                solution[-1].append(item[0])\n",
-       "            else:\n",
-       "                solution[-1].append(item[0])\n",
+       "    actions = [add_engine1, add_engine2, add_wheels1, add_wheels2, inspect1, inspect2]\n",
        "\n",
-       "        for num, item in enumerate(solution):\n",
-       "            item.reverse()\n",
-       "            solution[num] = item\n",
+       "    job_group1 = [add_engine1, add_wheels1, inspect1]\n",
+       "    job_group2 = [add_engine2, add_wheels2, inspect2]\n",
        "\n",
-       "        return solution\n",
+       "    return Problem(init='Car(C1) & Car(C2) & Wheels(W1) & Wheels(W2) & Engine(E2) & Engine(E2) & ~Has(C1, E1) & ~Has(C2, E2) & ~Has(C1, W1) & ~Has(C2, W2) & ~Inspected(C1) & ~Inspected(C2)',\n",
+       "                   goals='Has(C1, W1) & Has(C1, E1) & Inspected(C1) & Has(C2, W2) & Has(C2, E2) & Inspected(C2)',\n",
+       "                   actions=actions,\n",
+       "                   jobs=[job_group1, job_group2],\n",
+       "                   resources=resources)\n",
        "
\n", "\n", "\n" @@ -2727,54 +5474,157 @@ } ], "source": [ - "psource(GraphPlan)" + "psource(job_shop_problem)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Given a planning problem defined as a PDDL, `GraphPlan` creates a planning graph stored in `graph` and expands it till it reaches a state where all its required goals are present simultaneously without mutual exclusivity.\n", - "
\n", - "Once a goal is found, `extract_solution` is called.\n", - "This method recursively finds the path to a solution given a planning graph.\n", - "In the case where `extract_solution` fails to find a solution for a set of goals as a given level, we record the `(level, goals)` pair as a **no-good**.\n", - "Whenever `extract_solution` is called again with the same level and goals, we can find the recorded no-good and immediately return failure rather than searching again. \n", - "No-goods are also used in the termination test.\n", - "
\n", - "The `check_leveloff` method checks if the planning graph for the problem has **levelled-off**, ie, it has the same states, actions and mutex pairs as the previous level.\n", - "If the graph has already levelled off and we haven't found a solution, there is no point expanding the graph, as it won't lead to anything new.\n", - "In such a case, we can declare that the planning problem is unsolvable with the given constraints.\n", + "The states of this problem are:\n", "
\n", "
\n", - "To summarize, the `GraphPlan` algorithm calls `expand_graph` and tests whether it has reached the goal and if the goals are non-mutex.\n", + "**Has(x, y)**: Car **'x'** _has_ **'y'** where **'y'** can be an Engine or a Wheel.\n", + "\n", + "**~Has(x, y)**: Car **'x'** does _not have_ **'y'** where **'y'** can be an Engine or a Wheel.\n", + "\n", + "**Inspected(c)**: Car **'c'** has been _inspected_.\n", + "\n", + "**~Inspected(c)**: Car **'c'** has _not_ been inspected.\n", + "\n", + "In the initial state, `C1` and `C2` are cars and neither have an engine or wheels and haven't been inspected.\n", + "`E1` and `E2` are engines.\n", + "`W1` and `W2` are wheels.\n", "
\n", - "If so, `extract_solution` is invoked which recursively reconstructs the solution from the planning graph.\n", + "Our goal is to have engines and wheels on both cars and to get them inspected. We will discuss how to achieve this.\n", "
\n", - "If not, then we check if our graph has levelled off and continue if it hasn't." + "Let's define an object of the `job_shop_problem`." + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "jobShopProblem = job_shop_problem()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Let's solve a few planning problems that we had defined earlier." + "Before taking any actions, we will check if `jobShopProblem` has reached its goal." + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "False\n" + ] + } + ], + "source": [ + "print(jobShopProblem.goal_test())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We now define a possible solution that can help us reach the goal. \n", + "The actions are then carried out on the `jobShopProblem` object." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The following actions are available to us:\n", + "\n", + "**AddEngine1**: Adds an engine to the car C1. Takes 30 minutes to complete and uses an engine hoist.\n", + " \n", + "**AddEngine2**: Adds an engine to the car C2. Takes 60 minutes to complete and uses an engine hoist.\n", + "\n", + "**AddWheels1**: Adds wheels to car C1. Takes 30 minutes to complete. Uses a wheel station and consumes 20 lug nuts.\n", + "\n", + "**AddWheels2**: Adds wheels to car C2. Takes 15 minutes to complete. Uses a wheel station and consumes 20 lug nuts as well.\n", + "\n", + "**Inspect1**: Gets car C1 inspected. Requires 10 minutes of inspection by one inspector.\n", + "\n", + "**Inspect2**: Gets car C2 inspected. Requires 10 minutes of inspection by one inspector." + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "solution = [jobShopProblem.jobs[1][0],\n", + " jobShopProblem.jobs[1][1],\n", + " jobShopProblem.jobs[1][2],\n", + " jobShopProblem.jobs[0][0],\n", + " jobShopProblem.jobs[0][1],\n", + " jobShopProblem.jobs[0][2]]\n", + "\n", + "for action in solution:\n", + " jobShopProblem.act(action)" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "True\n" + ] + } + ], + "source": [ + "print(jobShopProblem.goal_test())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is a valid solution and one of many correct ways to solve this problem." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Air cargo problem:\n", + "## Double tennis problem\n", + "This problem is a simple case of a multiactor planning problem, where two agents act at once and can simultaneously change the current state of the problem. \n", + "A correct plan is one that, if executed by the actors, achieves the goal.\n", + "In the true multiagent setting, of course, the agents may not agree to execute any particular plan, but atleast they will know what plans _would_ work if they _did_ agree to execute them.\n", "
\n", - "In accordance with the summary above, we have defined a helper function to carry out `GraphPlan` on the `air_cargo` problem.\n", - "The function is pretty straightforward.\n", - "Let's have a look." + "In the double tennis problem, two actors A and B are playing together and can be in one of four locations: `LeftBaseLine`, `RightBaseLine`, `LeftNet` and `RightNet`.\n", + "The ball can be returned only if a player is in the right place.\n", + "Each action must include the actor as an argument.\n", + "
\n", + "Let's first look at the definition of the `double_tennis_problem` in the module." ] }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 81, "metadata": {}, "outputs": [ { @@ -2866,26 +5716,36 @@ "\n", "

\n", "\n", - "
def air_cargo_graphplan():\n",
-       "    """Solves the air cargo problem using GraphPlan"""\n",
-       "\n",
-       "    pddl = air_cargo()\n",
-       "    graphplan = GraphPlan(pddl)\n",
-       "\n",
-       "    def goal_test(kb, goals):\n",
-       "        return all(kb.ask(q) is not False for q in goals)\n",
+       "
def double_tennis_problem():\n",
+       "    """\n",
+       "    [Figure 11.10] DOUBLE-TENNIS-PROBLEM\n",
        "\n",
-       "    goals = expr('At(C1, JFK), At(C2, SFO)')\n",
+       "    A multiagent planning problem involving two partner tennis players\n",
+       "    trying to return an approaching ball and repositioning around in the court.\n",
        "\n",
-       "    while True:\n",
-       "        if (goal_test(graphplan.graph.levels[-1].kb, goals) and graphplan.graph.non_mutex_goals(goals, -1)):\n",
-       "            solution = graphplan.extract_solution(goals, -1)\n",
-       "            if solution:\n",
-       "                return solution\n",
+       "    Example:\n",
+       "    >>> from planning import *\n",
+       "    >>> dtp = double_tennis_problem()\n",
+       "    >>> goal_test(dtp.goals, dtp.init)\n",
+       "    False\n",
+       "    >>> dtp.act(expr('Go(A, RightBaseLine, LeftBaseLine)'))\n",
+       "    >>> dtp.act(expr('Hit(A, Ball, RightBaseLine)'))\n",
+       "    >>> goal_test(dtp.goals, dtp.init)\n",
+       "    False\n",
+       "    >>> dtp.act(expr('Go(A, LeftNet, RightBaseLine)'))\n",
+       "    >>> goal_test(dtp.goals, dtp.init)\n",
+       "    True\n",
+       "    >>>\n",
+       "    """\n",
        "\n",
-       "        graphplan.graph.expand_graph()\n",
-       "        if len(graphplan.graph.levels) >= 2 and graphplan.check_leveloff():\n",
-       "            return None\n",
+       "    return PlanningProblem(init='At(A, LeftBaseLine) & At(B, RightNet) & Approaching(Ball, RightBaseLine) & Partner(A, B) & Partner(B, A)',\n",
+       "                             goals='Returned(Ball) & At(x, LeftNet) & At(y, RightNet)',\n",
+       "                             actions=[Action('Hit(actor, Ball, loc)',\n",
+       "                                             precond='Approaching(Ball, loc) & At(actor, loc)',\n",
+       "                                             effect='Returned(Ball)'),\n",
+       "                                      Action('Go(actor, to, loc)', \n",
+       "                                             precond='At(actor, loc)',\n",
+       "                                             effect='At(actor, to) & ~At(actor, loc)')])\n",
        "
\n", "\n", "\n" @@ -2899,169 +5759,128 @@ } ], "source": [ - "psource(air_cargo_graphplan)" + "psource(double_tennis_problem)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Let's instantiate the problem and find a solution using this helper function." + "The states of this problem are:\n", + "\n", + "**Approaching(Ball, loc)**: The `Ball` is approaching the location `loc`.\n", + "\n", + "**Returned(Ball)**: One of the actors successfully hit the approaching ball from the correct location which caused it to return to the other side.\n", + "\n", + "**At(actor, loc)**: `actor` is at location `loc`.\n", + "\n", + "**~At(actor, loc)**: `actor` is _not_ at location `loc`.\n", + "\n", + "Let's now define an object of `double_tennis_problem`.\n" ] }, { "cell_type": "code", - "execution_count": 44, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[[[PCargo(C2),\n", - " Load(C2, P2, JFK),\n", - " PPlane(P2),\n", - " Load(C1, P1, SFO),\n", - " Fly(P1, SFO, JFK),\n", - " PAirport(SFO),\n", - " PAirport(JFK),\n", - " PPlane(P1),\n", - " PCargo(C1),\n", - " Fly(P2, JFK, SFO)],\n", - " [Unload(C2, P2, SFO), Unload(C1, P1, JFK)]]]" - ] - }, - "execution_count": 44, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": 82, + "metadata": { + "collapsed": true + }, + "outputs": [], "source": [ - "air_cargo = air_cargo_graphplan()\n", - "air_cargo" + "doubleTennisProblem = double_tennis_problem()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Each element in the solution is a valid action.\n", - "The solution is separated into lists for each level.\n", - "The actions prefixed with a 'P' are persistence actions and can be ignored.\n", - "They simply carry certain states forward.\n", - "We have another helper function `linearize` that presents the solution in a more readable format, much like a total-order planner." + "Before taking any actions, we will check if `doubleTennisProblem` has reached the goal." ] }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 83, "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "[Load(C2, P2, JFK),\n", - " Load(C1, P1, SFO),\n", - " Fly(P1, SFO, JFK),\n", - " Fly(P2, JFK, SFO),\n", - " Unload(C2, P2, SFO),\n", - " Unload(C1, P1, JFK)]" - ] - }, - "execution_count": 45, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "False\n" + ] } ], "source": [ - "linearize(air_cargo)" + "print(goal_test(doubleTennisProblem.goals, doubleTennisProblem.init))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Indeed, this is a correct solution.\n", - "
\n", - "There are similar helper functions for some other planning problems.\n", - "
\n", - "Lets' try solving the spare tire problem." + "As we can see, the goal hasn't been reached. \n", + "We now define a possible solution that can help us reach the goal of having the ball returned.\n", + "The actions will then be carried out on the `doubleTennisProblem` object." ] }, { - "cell_type": "code", - "execution_count": 46, + "cell_type": "markdown", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[Remove(Flat, Axle), Remove(Spare, Trunk), PutOn(Spare, Axle)]" - ] - }, - "execution_count": 46, - "metadata": {}, - "output_type": "execute_result" - } - ], "source": [ - "spare_tire = spare_tire_graphplan()\n", - "linearize(spare_tire)" + "The actions available to us are the following:\n", + "\n", + "**Hit(actor, ball, loc)**: returns an approaching ball if `actor` is present at the `loc` that the ball is approaching.\n", + "\n", + "**Go(actor, to, loc)**: moves an `actor` from location `loc` to location `to`.\n", + "\n", + "We notice something different in this problem though, \n", + "which is quite unlike any other problem we have seen so far. \n", + "The goal state of the problem contains a variable `a`.\n", + "This happens sometimes in multiagent planning problems \n", + "and it means that it doesn't matter _which_ actor is at the `LeftNet` or the `RightNet`, as long as there is atleast one actor at either `LeftNet` or `RightNet`." ] }, { - "cell_type": "markdown", - "metadata": {}, + "cell_type": "code", + "execution_count": 84, + "metadata": { + "collapsed": true + }, + "outputs": [], "source": [ - "Solution for the cake problem" + "solution = [expr('Go(A, RightBaseLine, LeftBaseLine)'),\n", + " expr('Hit(A, Ball, RightBaseLine)'),\n", + " expr('Go(A, LeftNet, RightBaseLine)')]\n", + "\n", + "for action in solution:\n", + " doubleTennisProblem.act(action)" ] }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 85, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[Eat(Cake), Bake(Cake)]" + "True" ] }, - "execution_count": 47, + "execution_count": 85, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "cake_problem = have_cake_and_eat_cake_too_graphplan()\n", - "linearize(cake_problem)" + "goal_test(doubleTennisProblem.goals, doubleTennisProblem.init)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Solution for the Sussman's Anomaly configuration of three blocks." - ] - }, - { - "cell_type": "code", - "execution_count": 48, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[MoveToTable(C, A), Move(B, Table, C), Move(A, Table, B)]" - ] - }, - "execution_count": 48, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "sussman_anomaly = three_block_tower_graphplan()\n", - "linearize(sussman_anomaly)" + "It has now successfully reached its goal, ie, to return the approaching ball." ] } ], diff --git a/planning.py b/planning.py index b5e35dae4..9492e2c8b 100644 --- a/planning.py +++ b/planning.py @@ -5,13 +5,14 @@ import itertools from search import Node from utils import Expr, expr, first -from logic import FolKB, conjuncts +from logic import FolKB, conjuncts, unify from collections import deque +from functools import reduce as _reduce -class PDDL: +class PlanningProblem: """ - Planning Domain Definition Language (PDDL) used to define a search problem. + Planning Domain Definition Language (PlanningProblem) used to define a search problem. It stores states in a knowledge base consisting of first order logic statements. The conjunction of these logical statements completely defines a state. """ @@ -63,7 +64,7 @@ def act(self, action): class Action: """ Defines an action schema using preconditions and effects. - Use this to describe actions in PDDL. + Use this to describe actions in PlanningProblem. action is an Expr where variables are given as arguments(args). Precondition and effect are both lists with positive and negative literals. Negative preconditions and effects are defined by adding a 'Not' before the name of the clause @@ -84,6 +85,9 @@ def __init__(self, action, precond, effect): def __call__(self, kb, args): return self.act(kb, args) + def __repr__(self): + return '{}({})'.format(self.__class__.__name__, Expr(self.name, *self.args)) + def convert(self, clauses): """Converts strings into Exprs""" if isinstance(clauses, Expr): @@ -148,11 +152,43 @@ def act(self, kb, args): return kb +def goal_test(goals, state): + """Generic goal testing helper function""" + + if isinstance(state, list): + kb = FolKB(state) + else: + kb = state + return all(kb.ask(q) is not False for q in goals) + + def air_cargo(): - """Air cargo problem""" + """ + [Figure 10.1] AIR-CARGO-PROBLEM + + An air-cargo shipment problem for delivering cargo to different locations, + given the starting location and airplanes. + + Example: + >>> from planning import * + >>> ac = air_cargo() + >>> ac.goal_test() + False + >>> ac.act(expr('Load(C2, P2, JFK)')) + >>> ac.act(expr('Load(C1, P1, SFO)')) + >>> ac.act(expr('Fly(P1, SFO, JFK)')) + >>> ac.act(expr('Fly(P2, JFK, SFO)')) + >>> ac.act(expr('Unload(C2, P2, SFO)')) + >>> ac.goal_test() + False + >>> ac.act(expr('Unload(C1, P1, JFK)')) + >>> ac.goal_test() + True + >>> + """ - return PDDL(init='At(C1, SFO) & At(C2, JFK) & At(P1, SFO) & At(P2, JFK) & Cargo(C1) & Cargo(C2) & Plane(P1) & Plane(P2) & Airport(SFO) & Airport(JFK)', - goals='At(C1, JFK) & At(C2, SFO)', + return PlanningProblem(init='At(C1, SFO) & At(C2, JFK) & At(P1, SFO) & At(P2, JFK) & Cargo(C1) & Cargo(C2) & Plane(P1) & Plane(P2) & Airport(SFO) & Airport(JFK)', + goals='At(C1, JFK) & At(C2, SFO)', actions=[Action('Load(c, p, a)', precond='At(c, a) & At(p, a) & Cargo(c) & Plane(p) & Airport(a)', effect='In(c, p) & ~At(c, a)'), @@ -165,9 +201,27 @@ def air_cargo(): def spare_tire(): - """Spare tire problem""" + """[Figure 10.2] SPARE-TIRE-PROBLEM + + A problem involving changing the flat tire of a car + with a spare tire from the trunk. + + Example: + >>> from planning import * + >>> st = spare_tire() + >>> st.goal_test() + False + >>> st.act(expr('Remove(Spare, Trunk)')) + >>> st.act(expr('Remove(Flat, Axle)')) + >>> st.goal_test() + False + >>> st.act(expr('PutOn(Spare, Axle)')) + >>> st.goal_test() + True + >>> + """ - return PDDL(init='Tire(Flat) & Tire(Spare) & At(Flat, Axle) & At(Spare, Trunk)', + return PlanningProblem(init='Tire(Flat) & Tire(Spare) & At(Flat, Axle) & At(Spare, Trunk)', goals='At(Spare, Axle) & At(Flat, Ground)', actions=[Action('Remove(obj, loc)', precond='At(obj, loc)', @@ -182,9 +236,28 @@ def spare_tire(): def three_block_tower(): - """Sussman Anomaly problem""" + """ + [Figure 10.3] THREE-BLOCK-TOWER + + A blocks-world problem of stacking three blocks in a certain configuration, + also known as the Sussman Anomaly. + + Example: + >>> from planning import * + >>> tbt = three_block_tower() + >>> tbt.goal_test() + False + >>> tbt.act(expr('MoveToTable(C, A)')) + >>> tbt.act(expr('Move(B, Table, C)')) + >>> tbt.goal_test() + False + >>> tbt.act(expr('Move(A, Table, B)')) + >>> tbt.goal_test() + True + >>> + """ - return PDDL(init='On(A, Table) & On(B, Table) & On(C, A) & Block(A) & Block(B) & Block(C) & Clear(B) & Clear(C)', + return PlanningProblem(init='On(A, Table) & On(B, Table) & On(C, A) & Block(A) & Block(B) & Block(C) & Clear(B) & Clear(C)', goals='On(A, B) & On(B, C)', actions=[Action('Move(b, x, y)', precond='On(b, x) & Clear(b) & Clear(y) & Block(b) & Block(y)', @@ -194,10 +267,60 @@ def three_block_tower(): effect='On(b, Table) & Clear(x) & ~On(b, x)')]) +def simple_blocks_world(): + """ + SIMPLE-BLOCKS-WORLD + + A simplified definition of the Sussman Anomaly problem. + + Example: + >>> from planning import * + >>> sbw = simple_blocks_world() + >>> sbw.goal_test() + False + >>> sbw.act(expr('ToTable(A, B)')) + >>> sbw.act(expr('FromTable(B, A)')) + >>> sbw.goal_test() + False + >>> sbw.act(expr('FromTable(C, B)')) + >>> sbw.goal_test() + True + >>> + """ + + return PlanningProblem(init='On(A, B) & Clear(A) & OnTable(B) & OnTable(C) & Clear(C)', + goals='On(B, A) & On(C, B)', + actions=[Action('ToTable(x, y)', + precond='On(x, y) & Clear(x)', + effect='~On(x, y) & Clear(y) & OnTable(x)'), + Action('FromTable(y, x)', + precond='OnTable(y) & Clear(y) & Clear(x)', + effect='~OnTable(y) & ~Clear(x) & On(y, x)')]) + + def have_cake_and_eat_cake_too(): - """Cake problem""" + """ + [Figure 10.7] CAKE-PROBLEM + + A problem where we begin with a cake and want to + reach the state of having a cake and having eaten a cake. + The possible actions include baking a cake and eating a cake. + + Example: + >>> from planning import * + >>> cp = have_cake_and_eat_cake_too() + >>> cp.goal_test() + False + >>> cp.act(expr('Eat(Cake)')) + >>> cp.goal_test() + False + >>> cp.act(expr('Bake(Cake)')) + >>> cp.goal_test() + True + >>> + """ - return PDDL(init='Have(Cake)', + return PlanningProblem(init='Have(Cake)', goals='Have(Cake) & Eaten(Cake)', actions=[Action('Eat(Cake)', precond='Have(Cake)', @@ -208,9 +331,29 @@ def have_cake_and_eat_cake_too(): def shopping_problem(): - """Shopping problem""" + """ + SHOPPING-PROBLEM + + A problem of acquiring some items given their availability at certain stores. + + Example: + >>> from planning import * + >>> sp = shopping_problem() + >>> sp.goal_test() + False + >>> sp.act(expr('Go(Home, HW)')) + >>> sp.act(expr('Buy(Drill, HW)')) + >>> sp.act(expr('Go(HW, SM)')) + >>> sp.act(expr('Buy(Banana, SM)')) + >>> sp.goal_test() + False + >>> sp.act(expr('Buy(Milk, SM)')) + >>> sp.goal_test() + True + >>> + """ - return PDDL(init='At(Home) & Sells(SM, Milk) & Sells(SM, Banana) & Sells(HW, Drill)', + return PlanningProblem(init='At(Home) & Sells(SM, Milk) & Sells(SM, Banana) & Sells(HW, Drill)', goals='Have(Milk) & Have(Banana) & Have(Drill)', actions=[Action('Buy(x, store)', precond='At(store) & Sells(store, x)', @@ -221,9 +364,28 @@ def shopping_problem(): def socks_and_shoes(): - """Socks and shoes problem""" + """ + SOCKS-AND-SHOES-PROBLEM + + A task of wearing socks and shoes on both feet + + Example: + >>> from planning import * + >>> ss = socks_and_shoes() + >>> ss.goal_test() + False + >>> ss.act(expr('RightSock')) + >>> ss.act(expr('RightShoe')) + >>> ss.act(expr('LeftSock')) + >>> ss.goal_test() + False + >>> ss.act(expr('LeftShoe')) + >>> ss.goal_test() + True + >>> + """ - return PDDL(init='', + return PlanningProblem(init='', goals='RightShoeOn & LeftShoeOn', actions=[Action('RightShoe', precond='RightSockOn', @@ -239,12 +401,32 @@ def socks_and_shoes(): effect='LeftSockOn')]) -# Doubles tennis problem def double_tennis_problem(): - return PDDL(init='At(A, LeftBaseLine) & At(B, RightNet) & Approaching(Ball, RightBaseLine) & Partner(A, B) & Partner(B, A)', + """ + [Figure 11.10] DOUBLE-TENNIS-PROBLEM + + A multiagent planning problem involving two partner tennis players + trying to return an approaching ball and repositioning around in the court. + + Example: + >>> from planning import * + >>> dtp = double_tennis_problem() + >>> goal_test(dtp.goals, dtp.init) + False + >>> dtp.act(expr('Go(A, RightBaseLine, LeftBaseLine)')) + >>> dtp.act(expr('Hit(A, Ball, RightBaseLine)')) + >>> goal_test(dtp.goals, dtp.init) + False + >>> dtp.act(expr('Go(A, LeftNet, RightBaseLine)')) + >>> goal_test(dtp.goals, dtp.init) + True + >>> + """ + + return PlanningProblem(init='At(A, LeftBaseLine) & At(B, RightNet) & Approaching(Ball, RightBaseLine) & Partner(A, B) & Partner(B, A)', goals='Returned(Ball) & At(a, LeftNet) & At(a, RightNet)', actions=[Action('Hit(actor, Ball, loc)', - precond='Approaching(Ball,loc) & At(actor,loc)', + precond='Approaching(Ball, loc) & At(actor, loc)', effect='Returned(Ball)'), Action('Go(actor, to, loc)', precond='At(actor, loc)', @@ -388,9 +570,9 @@ class Graph: Used in graph planning algorithm to extract a solution """ - def __init__(self, pddl): - self.pddl = pddl - self.kb = FolKB(pddl.init) + def __init__(self, planningproblem): + self.planningproblem = planningproblem + self.kb = FolKB(planningproblem.init) self.levels = [Level(self.kb)] self.objects = set(arg for clause in self.kb.clauses for arg in clause.args) @@ -401,7 +583,7 @@ def expand_graph(self): """Expands the graph by a level""" last_level = self.levels[-1] - last_level(self.pddl.actions, self.objects) + last_level(self.planningproblem.actions, self.objects) self.levels.append(last_level.perform_actions()) def non_mutex_goals(self, goals, index): @@ -421,8 +603,8 @@ class GraphPlan: Returns solution for the planning problem """ - def __init__(self, pddl): - self.graph = Graph(pddl) + def __init__(self, planningproblem): + self.graph = Graph(planningproblem) self.nogoods = [] self.solution = [] @@ -495,15 +677,15 @@ def extract_solution(self, goals, index): return solution def goal_test(self, kb): - return all(kb.ask(q) is not False for q in self.graph.pddl.goals) + return all(kb.ask(q) is not False for q in self.graph.planningproblem.goals) def execute(self): """Executes the GraphPlan algorithm for the given problem""" while True: self.graph.expand_graph() - if (self.goal_test(self.graph.levels[-1].kb) and self.graph.non_mutex_goals(self.graph.pddl.goals, -1)): - solution = self.extract_solution(self.graph.pddl.goals, -1) + if (self.goal_test(self.graph.levels[-1].kb) and self.graph.non_mutex_goals(self.graph.planningproblem.goals, -1)): + solution = self.extract_solution(self.graph.planningproblem.goals, -1) if solution: return solution @@ -511,10 +693,10 @@ def execute(self): return None -class TotalOrderPlanner: +class Linearize: - def __init__(self, pddl): - self.pddl = pddl + def __init__(self, planningproblem): + self.planningproblem = planningproblem def filter(self, solution): """Filter out persistence actions from a solution""" @@ -528,11 +710,11 @@ def filter(self, solution): new_solution.append(new_section) return new_solution - def orderlevel(self, level, pddl): + def orderlevel(self, level, planningproblem): """Return valid linear order of actions for a given level""" for permutation in itertools.permutations(level): - temp = copy.deepcopy(pddl) + temp = copy.deepcopy(planningproblem) count = 0 for action in permutation: try: @@ -540,7 +722,7 @@ def orderlevel(self, level, pddl): count += 1 except: count = 0 - temp = copy.deepcopy(pddl) + temp = copy.deepcopy(planningproblem) break if count == len(permutation): return list(permutation), temp @@ -549,12 +731,12 @@ def orderlevel(self, level, pddl): def execute(self): """Finds total-order solution for a planning graph""" - graphplan_solution = GraphPlan(self.pddl).execute() + graphplan_solution = GraphPlan(self.planningproblem).execute() filtered_solution = self.filter(graphplan_solution) ordered_solution = [] - pddl = self.pddl + planningproblem = self.planningproblem for level in filtered_solution: - level_solution, pddl = self.orderlevel(level, pddl) + level_solution, planningproblem = self.orderlevel(level, planningproblem) for element in level_solution: ordered_solution.append(element) @@ -573,6 +755,366 @@ def linearize(solution): return linear_solution +''' +[Section 10.13] PARTIAL-ORDER-PLANNER + +Partially ordered plans are created by a search through the space of plans +rather than a search through the state space. It views planning as a refinement of partially ordered plans. +A partially ordered plan is defined by a set of actions and a set of constraints of the form A < B, +which denotes that action A has to be performed before action B. +To summarize the working of a partial order planner, +1. An open precondition is selected (a sub-goal that we want to achieve). +2. An action that fulfils the open precondition is chosen. +3. Temporal constraints are updated. +4. Existing causal links are protected. Protection is a method that checks if the causal links conflict + and if they do, temporal constraints are added to fix the threats. +5. The set of open preconditions is updated. +6. Temporal constraints of the selected action and the next action are established. +7. A new causal link is added between the selected action and the owner of the open precondition. +8. The set of new causal links is checked for threats and if found, the threat is removed by either promotion or demotion. + If promotion or demotion is unable to solve the problem, the planning problem cannot be solved with the current sequence of actions + or it may not be solvable at all. +9. These steps are repeated until the set of open preconditions is empty. +''' + +class PartialOrderPlanner: + + def __init__(self, planningproblem): + self.planningproblem = planningproblem + self.initialize() + + def initialize(self): + """Initialize all variables""" + self.causal_links = [] + self.start = Action('Start', [], self.planningproblem.init) + self.finish = Action('Finish', self.planningproblem.goals, []) + self.actions = set() + self.actions.add(self.start) + self.actions.add(self.finish) + self.constraints = set() + self.constraints.add((self.start, self.finish)) + self.agenda = set() + for precond in self.finish.precond: + self.agenda.add((precond, self.finish)) + self.expanded_actions = self.expand_actions() + + def expand_actions(self, name=None): + """Generate all possible actions with variable bindings for precondition selection heuristic""" + + objects = set(arg for clause in self.planningproblem.init for arg in clause.args) + expansions = [] + action_list = [] + if name is not None: + for action in self.planningproblem.actions: + if str(action.name) == name: + action_list.append(action) + else: + action_list = self.planningproblem.actions + + for action in action_list: + for permutation in itertools.permutations(objects, len(action.args)): + bindings = unify(Expr(action.name, *action.args), Expr(action.name, *permutation)) + if bindings is not None: + new_args = [] + for arg in action.args: + if arg in bindings: + new_args.append(bindings[arg]) + else: + new_args.append(arg) + new_expr = Expr(str(action.name), *new_args) + new_preconds = [] + for precond in action.precond: + new_precond_args = [] + for arg in precond.args: + if arg in bindings: + new_precond_args.append(bindings[arg]) + else: + new_precond_args.append(arg) + new_precond = Expr(str(precond.op), *new_precond_args) + new_preconds.append(new_precond) + new_effects = [] + for effect in action.effect: + new_effect_args = [] + for arg in effect.args: + if arg in bindings: + new_effect_args.append(bindings[arg]) + else: + new_effect_args.append(arg) + new_effect = Expr(str(effect.op), *new_effect_args) + new_effects.append(new_effect) + expansions.append(Action(new_expr, new_preconds, new_effects)) + + return expansions + + def find_open_precondition(self): + """Find open precondition with the least number of possible actions""" + + number_of_ways = dict() + actions_for_precondition = dict() + for element in self.agenda: + open_precondition = element[0] + possible_actions = list(self.actions) + self.expanded_actions + for action in possible_actions: + for effect in action.effect: + if effect == open_precondition: + if open_precondition in number_of_ways: + number_of_ways[open_precondition] += 1 + actions_for_precondition[open_precondition].append(action) + else: + number_of_ways[open_precondition] = 1 + actions_for_precondition[open_precondition] = [action] + + number = sorted(number_of_ways, key=number_of_ways.__getitem__) + + for k, v in number_of_ways.items(): + if v == 0: + return None, None, None + + act1 = None + for element in self.agenda: + if element[0] == number[0]: + act1 = element[1] + break + + if number[0] in self.expanded_actions: + self.expanded_actions.remove(number[0]) + + return number[0], act1, actions_for_precondition[number[0]] + + def find_action_for_precondition(self, oprec): + """Find action for a given precondition""" + + # either + # choose act0 E Actions such that act0 achieves G + for action in self.actions: + for effect in action.effect: + if effect == oprec: + return action, 0 + + # or + # choose act0 E Actions such that act0 achieves G + for action in self.planningproblem.actions: + for effect in action.effect: + if effect.op == oprec.op: + bindings = unify(effect, oprec) + if bindings is None: + break + return action, bindings + + def generate_expr(self, clause, bindings): + """Generate atomic expression from generic expression given variable bindings""" + + new_args = [] + for arg in clause.args: + if arg in bindings: + new_args.append(bindings[arg]) + else: + new_args.append(arg) + + try: + return Expr(str(clause.name), *new_args) + except: + return Expr(str(clause.op), *new_args) + + def generate_action_object(self, action, bindings): + """Generate action object given a generic action andvariable bindings""" + + # if bindings is 0, it means the action already exists in self.actions + if bindings == 0: + return action + + # bindings cannot be None + else: + new_expr = self.generate_expr(action, bindings) + new_preconds = [] + for precond in action.precond: + new_precond = self.generate_expr(precond, bindings) + new_preconds.append(new_precond) + new_effects = [] + for effect in action.effect: + new_effect = self.generate_expr(effect, bindings) + new_effects.append(new_effect) + return Action(new_expr, new_preconds, new_effects) + + def cyclic(self, graph): + """Check cyclicity of a directed graph""" + + new_graph = dict() + for element in graph: + if element[0] in new_graph: + new_graph[element[0]].append(element[1]) + else: + new_graph[element[0]] = [element[1]] + + path = set() + + def visit(vertex): + path.add(vertex) + for neighbor in new_graph.get(vertex, ()): + if neighbor in path or visit(neighbor): + return True + path.remove(vertex) + return False + + value = any(visit(v) for v in new_graph) + return value + + def add_const(self, constraint, constraints): + """Add the constraint to constraints if the resulting graph is acyclic""" + + if constraint[0] == self.finish or constraint[1] == self.start: + return constraints + + new_constraints = set(constraints) + new_constraints.add(constraint) + + if self.cyclic(new_constraints): + return constraints + return new_constraints + + def is_a_threat(self, precondition, effect): + """Check if effect is a threat to precondition""" + + if (str(effect.op) == 'Not' + str(precondition.op)) or ('Not' + str(effect.op) == str(precondition.op)): + if effect.args == precondition.args: + return True + return False + + def protect(self, causal_link, action, constraints): + """Check and resolve threats by promotion or demotion""" + + threat = False + for effect in action.effect: + if self.is_a_threat(causal_link[1], effect): + threat = True + break + + if action != causal_link[0] and action != causal_link[2] and threat: + # try promotion + new_constraints = set(constraints) + new_constraints.add((action, causal_link[0])) + if not self.cyclic(new_constraints): + constraints = self.add_const((action, causal_link[0]), constraints) + else: + # try demotion + new_constraints = set(constraints) + new_constraints.add((causal_link[2], action)) + if not self.cyclic(new_constraints): + constraints = self.add_const((causal_link[2], action), constraints) + else: + # both promotion and demotion fail + print('Unable to resolve a threat caused by', action, 'onto', causal_link) + return + return constraints + + def convert(self, constraints): + """Convert constraints into a dict of Action to set orderings""" + + graph = dict() + for constraint in constraints: + if constraint[0] in graph: + graph[constraint[0]].add(constraint[1]) + else: + graph[constraint[0]] = set() + graph[constraint[0]].add(constraint[1]) + return graph + + def toposort(self, graph): + """Generate topological ordering of constraints""" + + if len(graph) == 0: + return + + graph = graph.copy() + + for k, v in graph.items(): + v.discard(k) + + extra_elements_in_dependencies = _reduce(set.union, graph.values()) - set(graph.keys()) + + graph.update({element:set() for element in extra_elements_in_dependencies}) + while True: + ordered = set(element for element, dependency in graph.items() if len(dependency) == 0) + if not ordered: + break + yield ordered + graph = {element: (dependency - ordered) for element, dependency in graph.items() if element not in ordered} + if len(graph) != 0: + raise ValueError('The graph is not acyclic and cannot be linearly ordered') + + def display_plan(self): + """Display causal links, constraints and the plan""" + + print('Causal Links') + for causal_link in self.causal_links: + print(causal_link) + + print('\nConstraints') + for constraint in self.constraints: + print(constraint[0], '<', constraint[1]) + + print('\nPartial Order Plan') + print(list(reversed(list(self.toposort(self.convert(self.constraints)))))) + + def execute(self, display=True): + """Execute the algorithm""" + + step = 1 + self.tries = 1 + while len(self.agenda) > 0: + step += 1 + # select from Agenda + try: + G, act1, possible_actions = self.find_open_precondition() + except IndexError: + print('Probably Wrong') + break + + act0 = possible_actions[0] + # remove from Agenda + self.agenda.remove((G, act1)) + + # For actions with variable number of arguments, use least commitment principle + # act0_temp, bindings = self.find_action_for_precondition(G) + # act0 = self.generate_action_object(act0_temp, bindings) + + # Actions = Actions U {act0} + self.actions.add(act0) + + # Constraints = add_const(start < act0, Constraints) + self.constraints = self.add_const((self.start, act0), self.constraints) + + # for each CL E CausalLinks do + # Constraints = protect(CL, act0, Constraints) + for causal_link in self.causal_links: + self.constraints = self.protect(causal_link, act0, self.constraints) + + # Agenda = Agenda U {: P is a precondition of act0} + for precondition in act0.precond: + self.agenda.add((precondition, act0)) + + # Constraints = add_const(act0 < act1, Constraints) + self.constraints = self.add_const((act0, act1), self.constraints) + + # CausalLinks U {} + if (act0, G, act1) not in self.causal_links: + self.causal_links.append((act0, G, act1)) + + # for each A E Actions do + # Constraints = protect(, A, Constraints) + for action in self.actions: + self.constraints = self.protect((act0, G, act1), action, self.constraints) + + if step > 200: + print('Couldn\'t find a solution') + return None, None + + if display: + self.display_plan() + else: + return self.constraints, self.causal_links + + def spare_tire_graphplan(): """Solves the spare tire problem using GraphPlan""" return GraphPlan(spare_tire()).execute() @@ -597,6 +1139,10 @@ def socks_and_shoes_graphplan(): """Solves the socks and shoes problem using GraphpPlan""" return GraphPlan(socks_and_shoes()).execute() +def simple_blocks_world_graphplan(): + """Solves the simple blocks world problem""" + return GraphPlan(simple_blocks_world()).execute() + class HLA(Action): """ @@ -679,7 +1225,7 @@ def inorder(self, job_order): return True -class Problem(PDDL): +class Problem(PlanningProblem): """ Define real-world problems by aggregating resources as numerical quantities instead of named entities. @@ -712,11 +1258,35 @@ def refinements(hla, state, library): # TODO - refinements may be (multiple) HL state is a Problem, containing the current state kb library is a dictionary containing details for every possible refinement. eg: { - 'HLA': ['Go(Home,SFO)', 'Go(Home,SFO)', 'Drive(Home, SFOLongTermParking)', 'Shuttle(SFOLongTermParking, SFO)', 'Taxi(Home, SFO)'], - 'steps': [['Drive(Home, SFOLongTermParking)', 'Shuttle(SFOLongTermParking, SFO)'], ['Taxi(Home, SFO)'], [], [], []], - # empty refinements ie primitive action - 'precond': [['At(Home), Have(Car)'], ['At(Home)'], ['At(Home)', 'Have(Car)'], ['At(SFOLongTermParking)'], ['At(Home)']], - 'effect': [['At(SFO)'], ['At(SFO)'], ['At(SFOLongTermParking)'], ['At(SFO)'], ['At(SFO)'], ['~At(Home)'], ['~At(Home)'], ['~At(Home)'], ['~At(SFOLongTermParking)'], ['~At(Home)']] + 'HLA': [ + 'Go(Home, SFO)', + 'Go(Home, SFO)', + 'Drive(Home, SFOLongTermParking)', + 'Shuttle(SFOLongTermParking, SFO)', + 'Taxi(Home, SFO)' + ], + 'steps': [ + ['Drive(Home, SFOLongTermParking)', 'Shuttle(SFOLongTermParking, SFO)'], + ['Taxi(Home, SFO)'], + [], + [], + [] + ], + # empty refinements indicate a primitive action + 'precond': [ + ['At(Home)', 'Have(Car)'], + ['At(Home)'], + ['At(Home)', 'Have(Car)'], + ['At(SFOLongTermParking)'], + ['At(Home)'] + ], + 'effect': [ + ['At(SFO)', '~At(Home)'], + ['At(SFO)', '~At(Home)'], + ['At(SFOLongTermParking)', '~At(Home)'], + ['At(SFO)', '~At(SFOLongTermParking)'], + ['At(SFO)', '~At(Home)'] + ] } """ e = Expr(hla.name, hla.args) @@ -779,7 +1349,7 @@ def result(problem, action): def job_shop_problem(): """ - [figure 11.1] JOB-SHOP-PROBLEM + [Figure 11.1] JOB-SHOP-PROBLEM A job-shop scheduling problem for assembling two cars, with resource and ordering constraints. @@ -820,3 +1390,48 @@ def job_shop_problem(): actions=actions, jobs=[job_group1, job_group2], resources=resources) + + +def go_to_sfo(): + """Go to SFO Problem""" + + go_home_sfo1 = HLA('Go(Home, SFO)', precond='At(Home) & Have(Car)', effect='At(SFO) & ~At(Home)') + go_home_sfo2 = HLA('Go(Home, SFO)', precond='At(Home)', effect='At(SFO) & ~At(Home)') + drive_home_sfoltp = HLA('Drive(Home, SFOLongTermParking)', precond='At(Home) & Have(Car)', effect='At(SFOLongTermParking) & ~At(Home)') + shuttle_sfoltp_sfo = HLA('Shuttle(SFOLongTermParking, SFO)', precond='At(SFOLongTermParking)', effect='At(SFO) & ~At(SFOLongTermParking)') + taxi_home_sfo = HLA('Taxi(Home, SFO)', precond='At(Home)', effect='At(SFO) & ~At(Home)') + + actions = [go_home_sfo1, go_home_sfo2, drive_home_sfoltp, shuttle_sfoltp_sfo, taxi_home_sfo] + + library = { + 'HLA': [ + 'Go(Home, SFO)', + 'Go(Home, SFO)', + 'Drive(Home, SFOLongTermParking)', + 'Shuttle(SFOLongTermParking, SFO)', + 'Taxi(Home, SFO)' + ], + 'steps': [ + ['Drive(Home, SFOLongTermParking)', 'Shuttle(SFOLongTermParking, SFO)'], + ['Taxi(Home, SFO)'], + [], + [], + [] + ], + 'precond': [ + ['At(Home)', 'Have(Car)'], + ['At(Home)'], + ['At(Home)', 'Have(Car)'], + ['At(SFOLongTermParking)'], + ['At(Home)'] + ], + 'effect': [ + ['At(SFO)', '~At(Home)'], + ['At(SFO)', '~At(Home)'], + ['At(SFOLongTermParking)', '~At(Home)'], + ['At(SFO)', '~At(SFOLongTermParking)'], + ['At(SFO)', '~At(Home)'] + ] + } + + return Problem(init='At(Home)', goals='At(SFO)', actions=actions), library diff --git a/pomdp.ipynb b/pomdp.ipynb deleted file mode 100644 index 1c8391818..000000000 --- a/pomdp.ipynb +++ /dev/null @@ -1,240 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Partially Observable Markov decision processes (POMDPs)\n", - "\n", - "This Jupyter notebook acts as supporting material for POMDPs, covered in **Chapter 17 Making Complex Decisions** of the book* Artificial Intelligence: A Modern Approach*. We make use of the implementations of POMPDPs in mdp.py module. This notebook has been separated from the notebook `mdp.py` as the topics are considerably more advanced.\n", - "\n", - "**Note that it is essential to work through and understand the mdp.ipynb notebook before diving into this one.**\n", - "\n", - "Let us import everything from the mdp module to get started." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "from mdp import *\n", - "from notebook import psource, pseudocode" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## CONTENTS\n", - "\n", - "1. Overview of MDPs\n", - "2. POMDPs - a conceptual outline\n", - "3. POMDPs - a rigorous outline\n", - "4. Value Iteration\n", - " - Value Iteration Visualization" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 1. OVERVIEW\n", - "\n", - "We first review Markov property and MDPs as in [Section 17.1] of the book.\n", - "\n", - "- A stochastic process is said to have the **Markov property**, or to have a **Markovian transition model** if the conditional probability distribution of future states of the process (conditional on both past and present states) depends only on the present state, not on the sequence of events that preceded it.\n", - "\n", - " -- (Source: [Wikipedia](https://en.wikipedia.org/wiki/Markov_property))\n", - "\n", - "A Markov decision process or MDP is defined as:\n", - "- a sequential decision problem for a fully observable, stochastic environment with a Markovian transition model and additive rewards.\n", - "\n", - "An MDP consists of a set of states (with an initial state $s_0$); a set $A(s)$ of actions\n", - "in each state; a transition model $P(s' | s, a)$; and a reward function $R(s)$.\n", - "\n", - "The MDP seeks to make sequential decisions to occupy states so as to maximise some combination of the reward function $R(s)$.\n", - "\n", - "The characteristic problem of the MDP is hence to identify the optimal policy function $\\pi^*(s)$ that provides the _utility-maximising_ action $a$ to be taken when the current state is $s$.\n", - "\n", - "### Belief vector\n", - "\n", - "**Note**: The book refers to the _belief vector_ as the _belief state_. We use the latter terminology here to retain our ability to refer to the belief vector as a _probability distribution over states_.\n", - "\n", - "The solution of an MDP is subject to certain properties of the problem which are assumed and justified in [Section 17.1]. One critical assumption is that the agent is **fully aware of its current state at all times**.\n", - "\n", - "A tedious (but rewarding, as we will see) way of expressing this is in terms of the **belief vector** $b$ of the agent. The belief vector is a function mapping states to probabilities or certainties of being in those states.\n", - "\n", - "Consider an agent that is fully aware that it is in state $s_i$ in the statespace $(s_1, s_2, ... s_n)$ at the current time.\n", - "\n", - "Its belief vector is the vector $(b(s_1), b(s_2), ... b(s_n))$ given by the function $b(s)$:\n", - "\\begin{align*}\n", - "b(s) &= 0 \\quad \\text{if }s \\neq s_i \\\\ &= 1 \\quad \\text{if } s = s_i\n", - "\\end{align*}\n", - "\n", - "Note that $b(s)$ is a probability distribution that necessarily sums to $1$ over all $s$.\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": true - }, - "source": [ - "## 2. POMDPs - a conceptual outline\n", - "\n", - "The POMDP really has only two modifications to the **problem formulation** compared to the MDP.\n", - "\n", - "- **Belief state** - In the real world, the current state of an agent is often not known with complete certainty. This makes the concept of a belief vector extremely relevant. It allows the agent to represent different degrees of certainty with which it _believes_ it is in each state.\n", - "\n", - "- **Evidence percepts** - In the real world, agents often have certain kinds of evidence, collected from sensors. They can use the probability distribution of observed evidence, conditional on state, to consolidate their information. This is a known distribution $P(e\\ |\\ s)$ - $e$ being an evidence, and $s$ being the state it is conditional on.\n", - "\n", - "Consider the world we used for the MDP. \n", - "\n", - "![title](images/grid_mdp.jpg)\n", - "\n", - "#### Using the belief vector\n", - "An agent beginning at $(1, 1)$ may not be certain that it is indeed in $(1, 1)$. Consider a belief vector $b$ such that:\n", - "\\begin{align*}\n", - " b((1,1)) &= 0.8 \\\\\n", - " b((2,1)) &= 0.1 \\\\\n", - " b((1,2)) &= 0.1 \\\\\n", - " b(s) &= 0 \\quad \\quad \\forall \\text{ other } s\n", - "\\end{align*}\n", - "\n", - "By horizontally catenating each row, we can represent this as an 11-dimensional vector (omitting $(2, 2)$).\n", - "\n", - "Thus, taking $s_1 = (1, 1)$, $s_2 = (1, 2)$, ... $s_{11} = (4,3)$, we have $b$:\n", - "\n", - "$b = (0.8, 0.1, 0, 0, 0.1, 0, 0, 0, 0, 0, 0)$ \n", - "\n", - "This fully represents the certainty to which the agent is aware of its state.\n", - "\n", - "#### Using evidence\n", - "The evidence observed here could be the number of adjacent 'walls' or 'dead ends' observed by the agent. We assume that the agent cannot 'orient' the walls - only count them.\n", - "\n", - "In this case, $e$ can take only two values, 1 and 2. This gives $P(e\\ |\\ s)$ as:\n", - "\\begin{align*}\n", - " P(e=2\\ |\\ s) &= \\frac{1}{7} \\quad \\forall \\quad s \\in \\{s_1, s_2, s_4, s_5, s_8, s_9, s_{11}\\}\\\\\n", - " P(e=1\\ |\\ s) &= \\frac{1}{4} \\quad \\forall \\quad s \\in \\{s_3, s_6, s_7, s_{10}\\} \\\\\n", - " P(e\\ |\\ s) &= 0 \\quad \\forall \\quad \\text{ other } s, e\n", - "\\end{align*}\n", - "\n", - "Note that the implications of the evidence on the state must be known **a priori** to the agent. Ways of reliably learning this distribution from percepts are beyond the scope of this notebook." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 3. POMDPs - a rigorous outline\n", - "\n", - "A POMDP is thus a sequential decision problem for for a *partially* observable, stochastic environment with a Markovian transition model, a known 'sensor model' for inferring state from observation, and additive rewards. \n", - "\n", - "Practically, a POMDP has the following, which an MDP also has:\n", - "- a set of states, each denoted by $s$\n", - "- a set of actions available in each state, $A(s)$\n", - "- a reward accrued on attaining some state, $R(s)$\n", - "- a transition probability $P(s'\\ |\\ s, a)$ of action $a$ changing the state from $s$ to $s'$\n", - "\n", - "And the following, which an MDP does not:\n", - "- a sensor model $P(e\\ |\\ s)$ on evidence conditional on states\n", - "\n", - "Additionally, the POMDP is now uncertain of its current state hence has:\n", - "- a belief vector $b$ representing the certainty of being in each state (as a probability distribution)\n", - "\n", - "\n", - "#### New uncertainties\n", - "\n", - "It is useful to intuitively appreciate the new uncertainties that have arisen in the agent's awareness of its own state.\n", - "\n", - "- At any point, the agent has belief vector $b$, the distribution of its believed likelihood of being in each state $s$.\n", - "- For each of these states $s$ that the agent may **actually** be in, it has some set of actions given by $A(s)$.\n", - "- Each of these actions may transport it to some other state $s'$, assuming an initial state $s$, with probability $P(s'\\ |\\ s, a)$\n", - "- Once the action is performed, the agent receives a percept $e$. $P(e\\ |\\ s)$ now tells it the chances of having perceived $e$ for each state $s$. The agent must use this information to update its new belief state appropriately.\n", - "\n", - "#### Evolution of the belief vector - the `FORWARD` function\n", - "\n", - "The new belief vector $b'(s')$ after an action $a$ on the belief vector $b(s)$ and the noting of evidence $e$ is:\n", - "$$ b'(s') = \\alpha P(e\\ |\\ s') \\sum_s P(s'\\ | s, a) b(s)$$ \n", - "\n", - "where $\\alpha$ is a normalising constant (to retain the interpretation of $b$ as a probability distribution.\n", - "\n", - "This equation is just counts the sum of likelihoods of going to a state $s'$ from every possible state $s$, times the initial likelihood of being in each $s$. This is multiplied by the likelihood that the known evidence actually implies the new state $s'$. \n", - "\n", - "This function is represented as `b' = FORWARD(b, a, e)`\n", - "\n", - "#### Probability distribution of the evolving belief vector\n", - "\n", - "The goal here is to find $P(b'\\ |\\ b, a)$ - the probability that action $a$ transforms belief vector $b$ into belief vector $b'$. The following steps illustrate this -\n", - "\n", - "The probability of observing evidence $e$ when action $a$ is enacted on belief vector $b$ can be distributed over each possible new state $s'$ resulting from it:\n", - "\\begin{align*}\n", - " P(e\\ |\\ b, a) &= \\sum_{s'} P(e\\ |\\ b, a, s') P(s'\\ |\\ b, a) \\\\\n", - " &= \\sum_{s'} P(e\\ |\\ s') P(s'\\ |\\ b, a) \\\\\n", - " &= \\sum_{s'} P(e\\ |\\ s') \\sum_s P(s'\\ |\\ s, a) b(s)\n", - "\\end{align*}\n", - "\n", - "The probability of getting belief vector $b'$ from $b$ by application of action $a$ can thus be summed over all possible evidences $e$:\n", - "\\begin{align*}\n", - " P(b'\\ |\\ b, a) &= \\sum_{e} P(b'\\ |\\ b, a, e) P(e\\ |\\ b, a) \\\\\n", - " &= \\sum_{e} P(b'\\ |\\ b, a, e) \\sum_{s'} P(e\\ |\\ s') \\sum_s P(s'\\ |\\ s, a) b(s)\n", - "\\end{align*}\n", - "\n", - "where $P(b'\\ |\\ b, a, e) = 1$ if $b' = $ `FORWARD(b, a, e)` and $= 0$ otherwise.\n", - "\n", - "Given initial and final belief states $b$ and $b'$, the transition probabilities still depend on the action $a$ and observed evidence $e$. Some belief states may be achievable by certain actions, but have non-zero probabilities for states prohibited by the evidence $e$. Thus, the above condition thus ensures that only valid combinations of $(b', b, a, e)$ are considered.\n", - "\n", - "#### A modified rewardspace\n", - "\n", - "For MDPs, the reward space was simple - one reward per available state. However, for a belief vector $b(s)$, the expected reward is now:\n", - "$$\\rho(b) = \\sum_s b(s) R(s)$$\n", - "\n", - "Thus, as the belief vector can take infinite values of the distribution over states, so can the reward for each belief vector vary over a hyperplane in the belief space, or space of states (planes in an $N$-dimensional space are formed by a linear combination of the axes)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.1" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/probability.ipynb b/probability.ipynb index 58e9b1994..d7f09eb3a 100644 --- a/probability.ipynb +++ b/probability.ipynb @@ -6,39 +6,221 @@ "source": [ "# Probability \n", "\n", - "This IPy notebook acts as supporting material for **Chapter 13 Quantifying Uncertainty**, **Chapter 14 Probabilistic Reasoning** and **Chapter 15 Probabilistic Reasoning over Time** of the book* Artificial Intelligence: A Modern Approach*. This notebook makes use of the implementations in probability.py module. Let us import everything from the probability module. It might be helpful to view the source of some of our implementations. Please refer to the Introductory IPy file for more details on how to do so." + "This IPy notebook acts as supporting material for topics covered in **Chapter 13 Quantifying Uncertainty**, **Chapter 14 Probabilistic Reasoning**, **Chapter 15 Probabilistic Reasoning over Time**, **Chapter 16 Making Simple Decisions** and parts of **Chapter 25 Robotics** of the book* Artificial Intelligence: A Modern Approach*. This notebook makes use of the implementations in probability.py module. Let us import everything from the probability module. It might be helpful to view the source of some of our implementations. Please refer to the Introductory IPy file for more details on how to do so." ] }, { "cell_type": "code", - "execution_count": 3, - "metadata": { - "collapsed": true - }, + "execution_count": 1, + "metadata": {}, "outputs": [], "source": [ "from probability import *\n", - "from notebook import *" + "from utils import print_table\n", + "from notebook import psource, pseudocode, heatmap" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## CONTENTS\n", + "- Probability Distribution\n", + " - Joint probability distribution\n", + " - Inference using full joint distributions\n", + "
\n", + "- Bayesian Networks\n", + " - BayesNode\n", + " - BayesNet\n", + " - Exact Inference in Bayesian Networks\n", + " - Enumeration\n", + " - Variable elimination\n", + " - Approximate Inference in Bayesian Networks\n", + " - Prior sample\n", + " - Rejection sampling\n", + " - Likelihood weighting\n", + " - Gibbs sampling\n", + "
\n", + "- Hidden Markov Models\n", + " - Inference in Hidden Markov Models\n", + " - Forward-backward\n", + " - Fixed lag smoothing\n", + " - Particle filtering\n", + "
\n", + "
\n", + "- Monte Carlo Localization\n", + "- Information Gathering Agent" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Probability Distribution\n", + "## PROBABILITY DISTRIBUTION\n", "\n", "Let us begin by specifying discrete probability distributions. The class **ProbDist** defines a discrete probability distribution. We name our random variable and then assign probabilities to the different values of the random variable. Assigning probabilities to the values works similar to that of using a dictionary with keys being the Value and we assign to it the probability. This is possible because of the magic methods **_ _getitem_ _** and **_ _setitem_ _** which store the probabilities in the prob dict of the object. You can keep the source window open alongside while playing with the rest of the code to get a better understanding." ] }, { "cell_type": "code", - "execution_count": 34, - "metadata": { - "collapsed": true - }, - "outputs": [], + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + " Codestin Search App\n", + " \n", + " \n", + "\n", + "\n", + "

\n", + "\n", + "
class ProbDist:\n",
+       "    """A discrete probability distribution. You name the random variable\n",
+       "    in the constructor, then assign and query probability of values.\n",
+       "    >>> P = ProbDist('Flip'); P['H'], P['T'] = 0.25, 0.75; P['H']\n",
+       "    0.25\n",
+       "    >>> P = ProbDist('X', {'lo': 125, 'med': 375, 'hi': 500})\n",
+       "    >>> P['lo'], P['med'], P['hi']\n",
+       "    (0.125, 0.375, 0.5)\n",
+       "    """\n",
+       "\n",
+       "    def __init__(self, varname='?', freqs=None):\n",
+       "        """If freqs is given, it is a dictionary of values - frequency pairs,\n",
+       "        then ProbDist is normalized."""\n",
+       "        self.prob = {}\n",
+       "        self.varname = varname\n",
+       "        self.values = []\n",
+       "        if freqs:\n",
+       "            for (v, p) in freqs.items():\n",
+       "                self[v] = p\n",
+       "            self.normalize()\n",
+       "\n",
+       "    def __getitem__(self, val):\n",
+       "        """Given a value, return P(value)."""\n",
+       "        try:\n",
+       "            return self.prob[val]\n",
+       "        except KeyError:\n",
+       "            return 0\n",
+       "\n",
+       "    def __setitem__(self, val, p):\n",
+       "        """Set P(val) = p."""\n",
+       "        if val not in self.values:\n",
+       "            self.values.append(val)\n",
+       "        self.prob[val] = p\n",
+       "\n",
+       "    def normalize(self):\n",
+       "        """Make sure the probabilities of all values sum to 1.\n",
+       "        Returns the normalized distribution.\n",
+       "        Raises a ZeroDivisionError if the sum of the values is 0."""\n",
+       "        total = sum(self.prob.values())\n",
+       "        if not isclose(total, 1.0):\n",
+       "            for val in self.prob:\n",
+       "                self.prob[val] /= total\n",
+       "        return self\n",
+       "\n",
+       "    def show_approx(self, numfmt='{:.3g}'):\n",
+       "        """Show the probabilities rounded and sorted by key, for the\n",
+       "        sake of portable doctests."""\n",
+       "        return ', '.join([('{}: ' + numfmt).format(v, p)\n",
+       "                          for (v, p) in sorted(self.prob.items())])\n",
+       "\n",
+       "    def __repr__(self):\n",
+       "        return "P({})".format(self.varname)\n",
+       "
\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ - "%psource ProbDist" + "psource(ProbDist)" ] }, { @@ -67,12 +249,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The first parameter of the constructor **varname** has a default value of '?'. So if the name is not passed it defaults to ?. The keyword argument **freqs** can be a dictionary of values of random variable:probability. These are then normalized such that the probability values sum upto 1 using the **normalize** method." + "The first parameter of the constructor **varname** has a default value of '?'. So if the name is not passed it defaults to ?. The keyword argument **freqs** can be a dictionary of values of random variable: probability. These are then normalized such that the probability values sum upto 1 using the **normalize** method." ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -81,7 +263,7 @@ "'?'" ] }, - "execution_count": 3, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -93,7 +275,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -102,7 +284,7 @@ "(0.125, 0.375, 0.5)" ] }, - "execution_count": 4, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -120,16 +302,16 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "['high', 'medium', 'low']" + "['low', 'medium', 'high']" ] }, - "execution_count": 5, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -142,12 +324,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The distribution by default is not normalized if values are added incremently. We can still force normalization by invoking the **normalize** method." + "The distribution by default is not normalized if values are added incrementally. We can still force normalization by invoking the **normalize** method." ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -156,7 +338,7 @@ "(50, 114, 64)" ] }, - "execution_count": 6, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -171,7 +353,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -180,7 +362,7 @@ "(0.21929824561403508, 0.5, 0.2807017543859649)" ] }, - "execution_count": 7, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -199,7 +381,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -208,7 +390,7 @@ "'Cat: 0.219, Dog: 0.5, Mice: 0.281'" ] }, - "execution_count": 8, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -231,7 +413,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -240,7 +422,7 @@ "(8, 10)" ] }, - "execution_count": 9, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -258,56 +440,6 @@ "_A probability model is completely determined by the joint distribution for all of the random variables._ (**Section 13.3**) The probability module implements these as the class **JointProbDist** which inherits from the **ProbDist** class. This class specifies a discrete probability distribute over a set of variables. " ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "%psource JointProbDist" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Values for a Joint Distribution is a an ordered tuple in which each item corresponds to the value associate with a particular variable. For Joint Distribution of X, Y where X, Y take integer values this can be something like (18, 19).\n", - "\n", - "To specify a Joint distribution we first need an ordered list of variables." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "P(['X', 'Y'])" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "variables = ['X', 'Y']\n", - "j = JointProbDist(variables)\n", - "j" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Like the **ProbDist** class **JointProbDist** also employes magic methods to assign probability to different values.\n", - "The probability can be assigned in either of the two formats for all possible values of the distribution. The **event_values** call inside **_ _getitem_ _** and **_ _setitem_ _** does the required processing to make this work." - ] - }, { "cell_type": "code", "execution_count": 11, @@ -315,202 +447,277 @@ "outputs": [ { "data": { - "text/plain": [ - "(0.2, 0.5)" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "j[1,1] = 0.2\n", - "j[dict(X=0, Y=1)] = 0.5\n", - "\n", - "(j[1,1], j[0,1])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "It is also possible to list all the values for a particular variable using the **values** method." - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[1, 0]" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "j.values('X')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Inference Using Full Joint Distributions\n", - "\n", - "In this section we use Full Joint Distributions to calculate the posterior distribution given some evidence. We represent evidence by using a python dictionary with variables as dict keys and dict values representing the values.\n", - "\n", - "This is illustrated in **Section 13.3** of the book. The functions **enumerate_joint** and **enumerate_joint_ask** implement this functionality. Under the hood they implement **Equation 13.9** from the book.\n", - "\n", - "$$\\textbf{P}(X | \\textbf{e}) = α \\textbf{P}(X, \\textbf{e}) = α \\sum_{y} \\textbf{P}(X, \\textbf{e}, \\textbf{y})$$\n", - "\n", - "Here **α** is the normalizing factor. **X** is our query variable and **e** is the evidence. According to the equation we enumerate on the remaining variables **y** (not in evidence or query variable) i.e. all possible combinations of **y**\n", - "\n", - "We will be using the same example as the book. Let us create the full joint distribution from **Figure 13.3**. " - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": { - "collapsed": true - }, - "outputs": [], + "text/html": [ + "\n", + "\n", + "\n", + "\n", + " Codestin Search App\n", + " \n", + " \n", + "\n", + "\n", + "

\n", + "\n", + "
class JointProbDist(ProbDist):\n",
+       "    """A discrete probability distribute over a set of variables.\n",
+       "    >>> P = JointProbDist(['X', 'Y']); P[1, 1] = 0.25\n",
+       "    >>> P[1, 1]\n",
+       "    0.25\n",
+       "    >>> P[dict(X=0, Y=1)] = 0.5\n",
+       "    >>> P[dict(X=0, Y=1)]\n",
+       "    0.5"""\n",
+       "\n",
+       "    def __init__(self, variables):\n",
+       "        self.prob = {}\n",
+       "        self.variables = variables\n",
+       "        self.vals = defaultdict(list)\n",
+       "\n",
+       "    def __getitem__(self, values):\n",
+       "        """Given a tuple or dict of values, return P(values)."""\n",
+       "        values = event_values(values, self.variables)\n",
+       "        return ProbDist.__getitem__(self, values)\n",
+       "\n",
+       "    def __setitem__(self, values, p):\n",
+       "        """Set P(values) = p.  Values can be a tuple or a dict; it must\n",
+       "        have a value for each of the variables in the joint. Also keep track\n",
+       "        of the values we have seen so far for each variable."""\n",
+       "        values = event_values(values, self.variables)\n",
+       "        self.prob[values] = p\n",
+       "        for var, val in zip(self.variables, values):\n",
+       "            if val not in self.vals[var]:\n",
+       "                self.vals[var].append(val)\n",
+       "\n",
+       "    def values(self, var):\n",
+       "        """Return the set of possible values for a variable."""\n",
+       "        return self.vals[var]\n",
+       "\n",
+       "    def __repr__(self):\n",
+       "        return "P({})".format(self.variables)\n",
+       "
\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ - "full_joint = JointProbDist(['Cavity', 'Toothache', 'Catch'])\n", - "full_joint[dict(Cavity=True, Toothache=True, Catch=True)] = 0.108\n", - "full_joint[dict(Cavity=True, Toothache=True, Catch=False)] = 0.012\n", - "full_joint[dict(Cavity=True, Toothache=False, Catch=True)] = 0.016\n", - "full_joint[dict(Cavity=True, Toothache=False, Catch=False)] = 0.064\n", - "full_joint[dict(Cavity=False, Toothache=True, Catch=True)] = 0.072\n", - "full_joint[dict(Cavity=False, Toothache=False, Catch=True)] = 0.144\n", - "full_joint[dict(Cavity=False, Toothache=True, Catch=False)] = 0.008\n", - "full_joint[dict(Cavity=False, Toothache=False, Catch=False)] = 0.576" + "psource(JointProbDist)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Let us now look at the **enumerate_joint** function returns the sum of those entries in P consistent with e,provided variables is P's remaining variables (the ones not in e). Here, P refers to the full joint distribution. The function uses a recursive call in its implementation. The first parameter **variables** refers to remaining variables. The function in each recursive call keeps on variable constant while varying others." + "Values for a Joint Distribution is a an ordered tuple in which each item corresponds to the value associate with a particular variable. For Joint Distribution of X, Y where X, Y take integer values this can be something like (18, 19).\n", + "\n", + "To specify a Joint distribution we first need an ordered list of variables." ] }, { "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "P(['X', 'Y'])" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "psource(enumerate_joint)" + "variables = ['X', 'Y']\n", + "j = JointProbDist(variables)\n", + "j" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Let us assume we want to find **P(Toothache=True)**. This can be obtained by marginalization (**Equation 13.6**). We can use **enumerate_joint** to solve for this by taking Toothache=True as our evidence. **enumerate_joint** will return the sum of probabilities consistent with evidence i.e. Marginal Probability." + "Like the **ProbDist** class **JointProbDist** also employes magic methods to assign probability to different values.\n", + "The probability can be assigned in either of the two formats for all possible values of the distribution. The **event_values** call inside **_ _getitem_ _** and **_ _setitem_ _** does the required processing to make this work." ] }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "0.19999999999999998" + "(0.2, 0.5)" ] }, - "execution_count": 15, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "evidence = dict(Toothache=True)\n", - "variables = ['Cavity', 'Catch'] # variables not part of evidence\n", - "ans1 = enumerate_joint(variables, evidence, full_joint)\n", - "ans1" + "j[1,1] = 0.2\n", + "j[dict(X=0, Y=1)] = 0.5\n", + "\n", + "(j[1,1], j[0,1])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "You can verify the result from our definition of the full joint distribution. We can use the same function to find more complex probabilities like **P(Cavity=True and Toothache=True)** " + "It is also possible to list all the values for a particular variable using the **values** method." ] }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "0.12" + "[1, 0]" ] }, - "execution_count": 16, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "evidence = dict(Cavity=True, Toothache=True)\n", - "variables = ['Catch'] # variables not part of evidence\n", - "ans2 = enumerate_joint(variables, evidence, full_joint)\n", - "ans2" + "j.values('X')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Being able to find sum of probabilities satisfying given evidence allows us to compute conditional probabilities like **P(Cavity=True | Toothache=True)** as we can rewrite this as $$P(Cavity=True | Toothache = True) = \\frac{P(Cavity=True \\ and \\ Toothache=True)}{P(Toothache=True)}$$\n", + "## Inference Using Full Joint Distributions\n", "\n", - "We have already calculated both the numerator and denominator." + "In this section we use Full Joint Distributions to calculate the posterior distribution given some evidence. We represent evidence by using a python dictionary with variables as dict keys and dict values representing the values.\n", + "\n", + "This is illustrated in **Section 13.3** of the book. The functions **enumerate_joint** and **enumerate_joint_ask** implement this functionality. Under the hood they implement **Equation 13.9** from the book.\n", + "\n", + "$$\\textbf{P}(X | \\textbf{e}) = \\alpha \\textbf{P}(X, \\textbf{e}) = \\alpha \\sum_{y} \\textbf{P}(X, \\textbf{e}, \\textbf{y})$$\n", + "\n", + "Here **α** is the normalizing factor. **X** is our query variable and **e** is the evidence. According to the equation we enumerate on the remaining variables **y** (not in evidence or query variable) i.e. all possible combinations of **y**\n", + "\n", + "We will be using the same example as the book. Let us create the full joint distribution from **Figure 13.3**. " ] }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 15, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0.6" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "ans2/ans1" + "full_joint = JointProbDist(['Cavity', 'Toothache', 'Catch'])\n", + "full_joint[dict(Cavity=True, Toothache=True, Catch=True)] = 0.108\n", + "full_joint[dict(Cavity=True, Toothache=True, Catch=False)] = 0.012\n", + "full_joint[dict(Cavity=True, Toothache=False, Catch=True)] = 0.016\n", + "full_joint[dict(Cavity=True, Toothache=False, Catch=False)] = 0.064\n", + "full_joint[dict(Cavity=False, Toothache=True, Catch=True)] = 0.072\n", + "full_joint[dict(Cavity=False, Toothache=False, Catch=True)] = 0.144\n", + "full_joint[dict(Cavity=False, Toothache=True, Catch=False)] = 0.008\n", + "full_joint[dict(Cavity=False, Toothache=False, Catch=False)] = 0.576" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "We might be interested in the probability distribution of a particular variable conditioned on some evidence. This can involve doing calculations like above for each possible value of the variable. This has been implemented slightly differently using normalization in the function **enumerate_joint_ask** which returns a probability distribution over the values of the variable **X**, given the {var:val} observations **e**, in the **JointProbDist P**. The implementation of this function calls **enumerate_joint** for each value of the query variable and passes **extended evidence** with the new evidence having **X = xi**. This is followed by normalization of the obtained distribution." + "Let us now look at the **enumerate_joint** function returns the sum of those entries in P consistent with e,provided variables is P's remaining variables (the ones not in e). Here, P refers to the full joint distribution. The function uses a recursive call in its implementation. The first parameter **variables** refers to remaining variables. The function in each recursive call keeps on variable constant while varying others." ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -602,20 +809,14 @@ "\n", "

\n", "\n", - "
def enumerate_joint_ask(X, e, P):\n",
-       "    """Return a probability distribution over the values of the variable X,\n",
-       "    given the {var:val} observations e, in the JointProbDist P. [Section 13.3]\n",
-       "    >>> P = JointProbDist(['X', 'Y'])\n",
-       "    >>> P[0,0] = 0.25; P[0,1] = 0.5; P[1,1] = P[2,1] = 0.125\n",
-       "    >>> enumerate_joint_ask('X', dict(Y=1), P).show_approx()\n",
-       "    '0: 0.667, 1: 0.167, 2: 0.167'\n",
-       "    """\n",
-       "    assert X not in e, "Query variable must be distinct from evidence"\n",
-       "    Q = ProbDist(X)  # probability distribution for X, initially empty\n",
-       "    Y = [v for v in P.variables if v != X and v not in e]  # hidden variables.\n",
-       "    for xi in P.values(X):\n",
-       "        Q[xi] = enumerate_joint(Y, extend(e, X, xi), P)\n",
-       "    return Q.normalize()\n",
+       "
def enumerate_joint(variables, e, P):\n",
+       "    """Return the sum of those entries in P consistent with e,\n",
+       "    provided variables is P's remaining variables (the ones not in e)."""\n",
+       "    if not variables:\n",
+       "        return P[e]\n",
+       "    Y, rest = variables[0], variables[1:]\n",
+       "    return sum([enumerate_joint(rest, extend(e, Y, y), P)\n",
+       "                for y in P.values(Y)])\n",
        "
\n", "\n", "\n" @@ -624,1226 +825,5535 @@ "" ] }, - "execution_count": 18, "metadata": {}, - "output_type": "execute_result" + "output_type": "display_data" } ], "source": [ - "psource(enumerate_joint_ask)" + "psource(enumerate_joint)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Let us find **P(Cavity | Toothache=True)** using **enumerate_joint_ask**." + "Let us assume we want to find **P(Toothache=True)**. This can be obtained by marginalization (**Equation 13.6**). We can use **enumerate_joint** to solve for this by taking Toothache=True as our evidence. **enumerate_joint** will return the sum of probabilities consistent with evidence i.e. Marginal Probability." ] }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "(0.6, 0.39999999999999997)" + "0.19999999999999998" ] }, - "execution_count": 19, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "query_variable = 'Cavity'\n", "evidence = dict(Toothache=True)\n", - "ans = enumerate_joint_ask(query_variable, evidence, full_joint)\n", - "(ans[True], ans[False])" + "variables = ['Cavity', 'Catch'] # variables not part of evidence\n", + "ans1 = enumerate_joint(variables, evidence, full_joint)\n", + "ans1" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "You can verify that the first value is the same as we obtained earlier by manual calculation." + "You can verify the result from our definition of the full joint distribution. We can use the same function to find more complex probabilities like **P(Cavity=True and Toothache=True)** " ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": 18, "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.12" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "## Bayesian Networks\n", - "\n", - "A Bayesian network is a representation of the joint probability distribution encoding a collection of conditional independence statements.\n", - "\n", - "A Bayes Network is implemented as the class **BayesNet**. It consisits of a collection of nodes implemented by the class **BayesNode**. The implementation in the above mentioned classes focuses only on boolean variables. Each node is associated with a variable and it contains a **conditional probabilty table (cpt)**. The **cpt** represents the probability distribution of the variable conditioned on its parents **P(X | parents)**.\n", - "\n", - "Let us dive into the **BayesNode** implementation." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "psource(BayesNode)" + "evidence = dict(Cavity=True, Toothache=True)\n", + "variables = ['Catch'] # variables not part of evidence\n", + "ans2 = enumerate_joint(variables, evidence, full_joint)\n", + "ans2" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "The constructor takes in the name of **variable**, **parents** and **cpt**. Here **variable** is a the name of the variable like 'Earthquake'. **parents** should a list or space separate string with variable names of parents. The conditional probability table is a dict {(v1, v2, ...): p, ...}, the distribution P(X=true | parent1=v1, parent2=v2, ...) = p. Here the keys are combination of boolean values that the parents take. The length and order of the values in keys should be same as the supplied **parent** list/string. In all cases the probability of X being false is left implicit, since it follows from P(X=true).\n", - "\n", - "The example below where we implement the network shown in **Figure 14.3** of the book will make this more clear.\n", - "\n", - "\n", + "Being able to find sum of probabilities satisfying given evidence allows us to compute conditional probabilities like **P(Cavity=True | Toothache=True)** as we can rewrite this as $$P(Cavity=True | Toothache = True) = \\frac{P(Cavity=True \\ and \\ Toothache=True)}{P(Toothache=True)}$$\n", "\n", - "The alarm node can be made as follows: " - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "alarm_node = BayesNode('Alarm', ['Burglary', 'Earthquake'], \n", - " {(True, True): 0.95,(True, False): 0.94, (False, True): 0.29, (False, False): 0.001})" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "It is possible to avoid using a tuple when there is only a single parent. So an alternative format for the **cpt** is" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "john_node = BayesNode('JohnCalls', ['Alarm'], {True: 0.90, False: 0.05})\n", - "mary_node = BayesNode('MaryCalls', 'Alarm', {(True, ): 0.70, (False, ): 0.01}) # Using string for parents.\n", - "# Equivalant to john_node definition." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The general format used for the alarm node always holds. For nodes with no parents we can also use. " - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "burglary_node = BayesNode('Burglary', '', 0.001)\n", - "earthquake_node = BayesNode('Earthquake', '', 0.002)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "It is possible to use the node for lookup function using the **p** method. The method takes in two arguments **value** and **event**. Event must be a dict of the type {variable:values, ..} The value corresponds to the value of the variable we are interested in (False or True).The method returns the conditional probability **P(X=value | parents=parent_values)**, where parent_values are the values of parents in event. (event must assign each parent a value.)" + "We have already calculated both the numerator and denominator." ] }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "0.09999999999999998" + "0.6" ] }, - "execution_count": 24, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "john_node.p(False, {'Alarm': True, 'Burglary': True}) # P(JohnCalls=False | Alarm=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "With all the information about nodes present it is possible to construct a Bayes Network using **BayesNet**. The **BayesNet** class does not take in nodes as input but instead takes a list of **node_specs**. An entry in **node_specs** is a tuple of the parameters we use to construct a **BayesNode** namely **(X, parents, cpt)**. **node_specs** must be ordered with parents before children." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "psource(BayesNet)" + "ans2/ans1" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "The constructor of **BayesNet** takes each item in **node_specs** and adds a **BayesNode** to its **nodes** object variable by calling the **add** method. **add** in turn adds node to the net. Its parents must already be in the net, and its variable must not. Thus add allows us to grow a **BayesNet** given its parents are already present.\n", - "\n", - "**burglary** global is an instance of **BayesNet** corresponding to the above example.\n", - "\n", - " T, F = True, False\n", - "\n", - " burglary = BayesNet([\n", - " ('Burglary', '', 0.001),\n", - " ('Earthquake', '', 0.002),\n", - " ('Alarm', 'Burglary Earthquake',\n", - " {(T, T): 0.95, (T, F): 0.94, (F, T): 0.29, (F, F): 0.001}),\n", - " ('JohnCalls', 'Alarm', {T: 0.90, F: 0.05}),\n", - " ('MaryCalls', 'Alarm', {T: 0.70, F: 0.01})\n", - " ])" + "We might be interested in the probability distribution of a particular variable conditioned on some evidence. This can involve doing calculations like above for each possible value of the variable. This has been implemented slightly differently using normalization in the function **enumerate_joint_ask** which returns a probability distribution over the values of the variable **X**, given the {var:val} observations **e**, in the **JointProbDist P**. The implementation of this function calls **enumerate_joint** for each value of the query variable and passes **extended evidence** with the new evidence having **X = xi**. This is followed by normalization of the obtained distribution." ] }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 20, "metadata": {}, "outputs": [ { "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + " Codestin Search App\n", + " \n", + " \n", + "\n", + "\n", + "

\n", + "\n", + "
def enumerate_joint_ask(X, e, P):\n",
+       "    """Return a probability distribution over the values of the variable X,\n",
+       "    given the {var:val} observations e, in the JointProbDist P. [Section 13.3]\n",
+       "    >>> P = JointProbDist(['X', 'Y'])\n",
+       "    >>> P[0,0] = 0.25; P[0,1] = 0.5; P[1,1] = P[2,1] = 0.125\n",
+       "    >>> enumerate_joint_ask('X', dict(Y=1), P).show_approx()\n",
+       "    '0: 0.667, 1: 0.167, 2: 0.167'\n",
+       "    """\n",
+       "    assert X not in e, "Query variable must be distinct from evidence"\n",
+       "    Q = ProbDist(X)  # probability distribution for X, initially empty\n",
+       "    Y = [v for v in P.variables if v != X and v not in e]  # hidden variables.\n",
+       "    for xi in P.values(X):\n",
+       "        Q[xi] = enumerate_joint(Y, extend(e, X, xi), P)\n",
+       "    return Q.normalize()\n",
+       "
\n", + "\n", + "\n" + ], "text/plain": [ - "BayesNet([('Burglary', ''), ('Earthquake', ''), ('Alarm', 'Burglary Earthquake'), ('JohnCalls', 'Alarm'), ('MaryCalls', 'Alarm')])" + "" ] }, - "execution_count": 26, "metadata": {}, - "output_type": "execute_result" + "output_type": "display_data" } ], "source": [ - "burglary" + "psource(enumerate_joint_ask)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "**BayesNet** method **variable_node** allows to reach **BayesNode** instances inside a Bayes Net. It is possible to modify the **cpt** of the nodes directly using this method." + "Let us find **P(Cavity | Toothache=True)** using **enumerate_joint_ask**." ] }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "probability.BayesNode" + "(0.6, 0.39999999999999997)" ] }, - "execution_count": 27, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "type(burglary.variable_node('Alarm'))" + "query_variable = 'Cavity'\n", + "evidence = dict(Toothache=True)\n", + "ans = enumerate_joint_ask(query_variable, evidence, full_joint)\n", + "(ans[True], ans[False])" ] }, { - "cell_type": "code", - "execution_count": 28, + "cell_type": "markdown", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{(False, False): 0.001,\n", - " (False, True): 0.29,\n", - " (True, False): 0.94,\n", - " (True, True): 0.95}" - ] - }, - "execution_count": 28, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "burglary.variable_node('Alarm').cpt" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Exact Inference in Bayesian Networks\n", - "\n", - "A Bayes Network is a more compact representation of the full joint distribution and like full joint distributions allows us to do inference i.e. answer questions about probability distributions of random variables given some evidence.\n", - "\n", - "Exact algorithms don't scale well for larger networks. Approximate algorithms are explained in the next section.\n", - "\n", - "### Inference by Enumeration\n", - "\n", - "We apply techniques similar to those used for **enumerate_joint_ask** and **enumerate_joint** to draw inference from Bayesian Networks. **enumeration_ask** and **enumerate_all** implement the algorithm described in **Figure 14.9** of the book." - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": { - "collapsed": true - }, - "outputs": [], "source": [ - "psource(enumerate_all)" + "You can verify that the first value is the same as we obtained earlier by manual calculation." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "**enumerate__all** recursively evaluates a general form of the **Equation 14.4** in the book.\n", + "## BAYESIAN NETWORKS\n", "\n", - "$$\\textbf{P}(X | \\textbf{e}) = α \\textbf{P}(X, \\textbf{e}) = α \\sum_{y} \\textbf{P}(X, \\textbf{e}, \\textbf{y})$$ \n", + "A Bayesian network is a representation of the joint probability distribution encoding a collection of conditional independence statements.\n", "\n", - "such that **P(X, e, y)** is written in the form of product of conditional probabilities **P(variable | parents(variable))** from the Bayesian Network.\n", + "A Bayes Network is implemented as the class **BayesNet**. It consisits of a collection of nodes implemented by the class **BayesNode**. The implementation in the above mentioned classes focuses only on boolean variables. Each node is associated with a variable and it contains a **conditional probabilty table (cpt)**. The **cpt** represents the probability distribution of the variable conditioned on its parents **P(X | parents)**.\n", "\n", - "**enumeration_ask** calls **enumerate_all** on each value of query variable **X** and finally normalizes them. \n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "psource(enumeration_ask)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let us solve the problem of finding out **P(Burglary=True | JohnCalls=True, MaryCalls=True)** using the **burglary** network.**enumeration_ask** takes three arguments **X** = variable name, **e** = Evidence (in form a dict like previously explained), **bn** = The Bayes Net to do inference on." + "Let us dive into the **BayesNode** implementation." ] }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 22, "metadata": {}, "outputs": [ { "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + " Codestin Search App\n", + " \n", + " \n", + "\n", + "\n", + "

\n", + "\n", + "
class BayesNode:\n",
+       "    """A conditional probability distribution for a boolean variable,\n",
+       "    P(X | parents). Part of a BayesNet."""\n",
+       "\n",
+       "    def __init__(self, X, parents, cpt):\n",
+       "        """X is a variable name, and parents a sequence of variable\n",
+       "        names or a space-separated string.  cpt, the conditional\n",
+       "        probability table, takes one of these forms:\n",
+       "\n",
+       "        * A number, the unconditional probability P(X=true). You can\n",
+       "          use this form when there are no parents.\n",
+       "\n",
+       "        * A dict {v: p, ...}, the conditional probability distribution\n",
+       "          P(X=true | parent=v) = p. When there's just one parent.\n",
+       "\n",
+       "        * A dict {(v1, v2, ...): p, ...}, the distribution P(X=true |\n",
+       "          parent1=v1, parent2=v2, ...) = p. Each key must have as many\n",
+       "          values as there are parents. You can use this form always;\n",
+       "          the first two are just conveniences.\n",
+       "\n",
+       "        In all cases the probability of X being false is left implicit,\n",
+       "        since it follows from P(X=true).\n",
+       "\n",
+       "        >>> X = BayesNode('X', '', 0.2)\n",
+       "        >>> Y = BayesNode('Y', 'P', {T: 0.2, F: 0.7})\n",
+       "        >>> Z = BayesNode('Z', 'P Q',\n",
+       "        ...    {(T, T): 0.2, (T, F): 0.3, (F, T): 0.5, (F, F): 0.7})\n",
+       "        """\n",
+       "        if isinstance(parents, str):\n",
+       "            parents = parents.split()\n",
+       "\n",
+       "        # We store the table always in the third form above.\n",
+       "        if isinstance(cpt, (float, int)):  # no parents, 0-tuple\n",
+       "            cpt = {(): cpt}\n",
+       "        elif isinstance(cpt, dict):\n",
+       "            # one parent, 1-tuple\n",
+       "            if cpt and isinstance(list(cpt.keys())[0], bool):\n",
+       "                cpt = {(v,): p for v, p in cpt.items()}\n",
+       "\n",
+       "        assert isinstance(cpt, dict)\n",
+       "        for vs, p in cpt.items():\n",
+       "            assert isinstance(vs, tuple) and len(vs) == len(parents)\n",
+       "            assert all(isinstance(v, bool) for v in vs)\n",
+       "            assert 0 <= p <= 1\n",
+       "\n",
+       "        self.variable = X\n",
+       "        self.parents = parents\n",
+       "        self.cpt = cpt\n",
+       "        self.children = []\n",
+       "\n",
+       "    def p(self, value, event):\n",
+       "        """Return the conditional probability\n",
+       "        P(X=value | parents=parent_values), where parent_values\n",
+       "        are the values of parents in event. (event must assign each\n",
+       "        parent a value.)\n",
+       "        >>> bn = BayesNode('X', 'Burglary', {T: 0.2, F: 0.625})\n",
+       "        >>> bn.p(False, {'Burglary': False, 'Earthquake': True})\n",
+       "        0.375"""\n",
+       "        assert isinstance(value, bool)\n",
+       "        ptrue = self.cpt[event_values(event, self.parents)]\n",
+       "        return ptrue if value else 1 - ptrue\n",
+       "\n",
+       "    def sample(self, event):\n",
+       "        """Sample from the distribution for this variable conditioned\n",
+       "        on event's values for parent_variables. That is, return True/False\n",
+       "        at random according with the conditional probability given the\n",
+       "        parents."""\n",
+       "        return probability(self.p(True, event))\n",
+       "\n",
+       "    def __repr__(self):\n",
+       "        return repr((self.variable, ' '.join(self.parents)))\n",
+       "
\n", + "\n", + "\n" + ], "text/plain": [ - "0.2841718353643929" + "" ] }, - "execution_count": 30, "metadata": {}, - "output_type": "execute_result" + "output_type": "display_data" } ], "source": [ - "ans_dist = enumeration_ask('Burglary', {'JohnCalls': True, 'MaryCalls': True}, burglary)\n", - "ans_dist[True]" + "psource(BayesNode)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Variable Elimination\n", - "\n", - "The enumeration algorithm can be improved substantially by eliminating repeated calculations. In enumeration we join the joint of all hidden variables. This is of exponential size for the number of hidden variables. Variable elimination employes interleaving join and marginalization.\n", - "\n", - "Before we look into the implementation of Variable Elimination we must first familiarize ourselves with Factors. \n", - "\n", - "In general we call a multidimensional array of type P(Y1 ... Yn | X1 ... Xm) a factor where some of Xs and Ys maybe assigned values. Factors are implemented in the probability module as the class **Factor**. They take as input **variables** and **cpt**. \n", + "The constructor takes in the name of **variable**, **parents** and **cpt**. Here **variable** is a the name of the variable like 'Earthquake'. **parents** should a list or space separate string with variable names of parents. The conditional probability table is a dict {(v1, v2, ...): p, ...}, the distribution P(X=true | parent1=v1, parent2=v2, ...) = p. Here the keys are combination of boolean values that the parents take. The length and order of the values in keys should be same as the supplied **parent** list/string. In all cases the probability of X being false is left implicit, since it follows from P(X=true).\n", "\n", + "The example below where we implement the network shown in **Figure 14.3** of the book will make this more clear.\n", "\n", - "#### Helper Functions\n", + "\n", "\n", - "There are certain helper functions that help creating the **cpt** for the Factor given the evidence. Let us explore them one by one." + "The alarm node can be made as follows: " ] }, { "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, + "execution_count": 23, + "metadata": {}, "outputs": [], "source": [ - "psource( make_factor)" + "alarm_node = BayesNode('Alarm', ['Burglary', 'Earthquake'], \n", + " {(True, True): 0.95,(True, False): 0.94, (False, True): 0.29, (False, False): 0.001})" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "**make_factor** is used to create the **cpt** and **variables** that will be passed to the constructor of **Factor**. We use **make_factor** for each variable. It takes in the arguments **var** the particular variable, **e** the evidence we want to do inference on, **bn** the bayes network.\n", - "\n", - "Here **variables** for each node refers to a list consisting of the variable itself and the parents minus any variables that are part of the evidence. This is created by finding the **node.parents** and filtering out those that are not part of the evidence.\n", - "\n", - "The **cpt** created is the one similar to the original **cpt** of the node with only rows that agree with the evidence." + "It is possible to avoid using a tuple when there is only a single parent. So an alternative format for the **cpt** is" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, + "execution_count": 24, + "metadata": {}, "outputs": [], "source": [ - "psource(all_events)" + "john_node = BayesNode('JohnCalls', ['Alarm'], {True: 0.90, False: 0.05})\n", + "mary_node = BayesNode('MaryCalls', 'Alarm', {(True, ): 0.70, (False, ): 0.01}) # Using string for parents.\n", + "# Equivalant to john_node definition." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "The **all_events** function is a recursive generator function which yields a key for the orignal **cpt** which is part of the node. This works by extending evidence related to the node, thus all the output from **all_events** only includes events that support the evidence. Given **all_events** is a generator function one such event is returned on every call. \n", - "\n", - "We can try this out using the example on **Page 524** of the book. We will make **f**5(A) = P(m | A)" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "f5 = make_factor('MaryCalls', {'JohnCalls': True, 'MaryCalls': True}, burglary)" + "The general format used for the alarm node always holds. For nodes with no parents we can also use. " ] }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 25, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 32, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "f5" + "burglary_node = BayesNode('Burglary', '', 0.001)\n", + "earthquake_node = BayesNode('Earthquake', '', 0.002)" ] }, { - "cell_type": "code", - "execution_count": 33, + "cell_type": "markdown", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{(False,): 0.01, (True,): 0.7}" - ] - }, - "execution_count": 33, - "metadata": {}, - "output_type": "execute_result" - } - ], "source": [ - "f5.cpt" + "It is possible to use the node for lookup function using the **p** method. The method takes in two arguments **value** and **event**. Event must be a dict of the type {variable:values, ..} The value corresponds to the value of the variable we are interested in (False or True).The method returns the conditional probability **P(X=value | parents=parent_values)**, where parent_values are the values of parents in event. (event must assign each parent a value.)" ] }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "['Alarm']" + "0.09999999999999998" ] }, - "execution_count": 34, + "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "f5.variables" + "john_node.p(False, {'Alarm': True, 'Burglary': True}) # P(JohnCalls=False | Alarm=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Here **f5.cpt** False key gives probability for **P(MaryCalls=True | Alarm = False)**. Due to our representation where we only store probabilities for only in cases where the node variable is True this is the same as the **cpt** of the BayesNode. Let us try a somewhat different example from the book where evidence is that the Alarm = True" + "With all the information about nodes present it is possible to construct a Bayes Network using **BayesNet**. The **BayesNet** class does not take in nodes as input but instead takes a list of **node_specs**. An entry in **node_specs** is a tuple of the parameters we use to construct a **BayesNode** namely **(X, parents, cpt)**. **node_specs** must be ordered with parents before children." ] }, { "cell_type": "code", - "execution_count": 35, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "new_factor = make_factor('MaryCalls', {'Alarm': True}, burglary)" - ] - }, - { - "cell_type": "code", - "execution_count": 36, + "execution_count": 27, "metadata": {}, "outputs": [ { "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + " Codestin Search App\n", + " \n", + " \n", + "\n", + "\n", + "

\n", + "\n", + "
class BayesNet:\n",
+       "    """Bayesian network containing only boolean-variable nodes."""\n",
+       "\n",
+       "    def __init__(self, node_specs=None):\n",
+       "        """Nodes must be ordered with parents before children."""\n",
+       "        self.nodes = []\n",
+       "        self.variables = []\n",
+       "        node_specs = node_specs or []\n",
+       "        for node_spec in node_specs:\n",
+       "            self.add(node_spec)\n",
+       "\n",
+       "    def add(self, node_spec):\n",
+       "        """Add a node to the net. Its parents must already be in the\n",
+       "        net, and its variable must not."""\n",
+       "        node = BayesNode(*node_spec)\n",
+       "        assert node.variable not in self.variables\n",
+       "        assert all((parent in self.variables) for parent in node.parents)\n",
+       "        self.nodes.append(node)\n",
+       "        self.variables.append(node.variable)\n",
+       "        for parent in node.parents:\n",
+       "            self.variable_node(parent).children.append(node)\n",
+       "\n",
+       "    def variable_node(self, var):\n",
+       "        """Return the node for the variable named var.\n",
+       "        >>> burglary.variable_node('Burglary').variable\n",
+       "        'Burglary'"""\n",
+       "        for n in self.nodes:\n",
+       "            if n.variable == var:\n",
+       "                return n\n",
+       "        raise Exception("No such variable: {}".format(var))\n",
+       "\n",
+       "    def variable_values(self, var):\n",
+       "        """Return the domain of var."""\n",
+       "        return [True, False]\n",
+       "\n",
+       "    def __repr__(self):\n",
+       "        return 'BayesNet({0!r})'.format(self.nodes)\n",
+       "
\n", + "\n", + "\n" + ], "text/plain": [ - "{(False,): 0.30000000000000004, (True,): 0.7}" + "" ] }, - "execution_count": 36, "metadata": {}, - "output_type": "execute_result" + "output_type": "display_data" } ], "source": [ - "new_factor.cpt" + "psource(BayesNet)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Here the **cpt** is for **P(MaryCalls | Alarm = True)**. Therefore the probabilities for True and False sum up to one. Note the difference between both the cases. Again the only rows included are those consistent with the evidence.\n", + "The constructor of **BayesNet** takes each item in **node_specs** and adds a **BayesNode** to its **nodes** object variable by calling the **add** method. **add** in turn adds node to the net. Its parents must already be in the net, and its variable must not. Thus add allows us to grow a **BayesNet** given its parents are already present.\n", "\n", - "#### Operations on Factors\n", + "**burglary** global is an instance of **BayesNet** corresponding to the above example.\n", "\n", - "We are interested in two kinds of operations on factors. **Pointwise Product** which is used to created joint distributions and **Summing Out** which is used for marginalization." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "psource(Factor.pointwise_product)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Factor.pointwise_product** implements a method of creating a joint via combining two factors. We take the union of **variables** of both the factors and then generate the **cpt** for the new factor using **all_events** function. Note that the given we have eliminated rows that are not consistent with the evidence. Pointwise product assigns new probabilities by multiplying rows similar to that in a database join." + " T, F = True, False\n", + "\n", + " burglary = BayesNet([\n", + " ('Burglary', '', 0.001),\n", + " ('Earthquake', '', 0.002),\n", + " ('Alarm', 'Burglary Earthquake',\n", + " {(T, T): 0.95, (T, F): 0.94, (F, T): 0.29, (F, F): 0.001}),\n", + " ('JohnCalls', 'Alarm', {T: 0.90, F: 0.05}),\n", + " ('MaryCalls', 'Alarm', {T: 0.70, F: 0.01})\n", + " ])" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "psource(pointwise_product)" - ] - }, - { - "cell_type": "markdown", + "execution_count": 28, "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "BayesNet([('Burglary', ''), ('Earthquake', ''), ('Alarm', 'Burglary Earthquake'), ('JohnCalls', 'Alarm'), ('MaryCalls', 'Alarm')])" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "**pointwise_product** extends this operation to more than two operands where it is done sequentially in pairs of two." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "psource(Factor.sum_out)" + "burglary" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "**Factor.sum_out** makes a factor eliminating a variable by summing over its values. Again **events_all** is used to generate combinations for the rest of the variables." + "**BayesNet** method **variable_node** allows to reach **BayesNode** instances inside a Bayes Net. It is possible to modify the **cpt** of the nodes directly using this method." ] }, { "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "psource(sum_out)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**sum_out** uses both **Factor.sum_out** and **pointwise_product** to finally eliminate a particular variable from all factors by summing over its values." - ] - }, - { - "cell_type": "markdown", + "execution_count": 29, "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "probability.BayesNode" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "#### Elimination Ask\n", - "\n", - "The algorithm described in **Figure 14.11** of the book is implemented by the function **elimination_ask**. We use this for inference. The key idea is that we eliminate the hidden variables by interleaving joining and marginalization. It takes in 3 arguments **X** the query variable, **e** the evidence variable and **bn** the Bayes network. \n", - "\n", - "The algorithm creates factors out of Bayes Nodes in reverse order and eliminates hidden variables using **sum_out**. Finally it takes a point wise product of all factors and normalizes. Let us finally solve the problem of inferring \n", - "\n", - "**P(Burglary=True | JohnCalls=True, MaryCalls=True)** using variable elimination." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "psource(elimination_ask)" + "type(burglary.variable_node('Alarm'))" ] }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "'False: 0.716, True: 0.284'" + "{(False, False): 0.001,\n", + " (False, True): 0.29,\n", + " (True, False): 0.94,\n", + " (True, True): 0.95}" ] }, - "execution_count": 38, + "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "elimination_ask('Burglary', dict(JohnCalls=True, MaryCalls=True), burglary).show_approx()" + "burglary.variable_node('Alarm').cpt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Approximate Inference in Bayesian Networks\n", + "## Exact Inference in Bayesian Networks\n", "\n", - "Exact inference fails to scale for very large and complex Bayesian Networks. This section covers implementation of randomized sampling algorithms, also called Monte Carlo algorithms." + "A Bayes Network is a more compact representation of the full joint distribution and like full joint distributions allows us to do inference i.e. answer questions about probability distributions of random variables given some evidence.\n", + "\n", + "Exact algorithms don't scale well for larger networks. Approximate algorithms are explained in the next section.\n", + "\n", + "### Inference by Enumeration\n", + "\n", + "We apply techniques similar to those used for **enumerate_joint_ask** and **enumerate_joint** to draw inference from Bayesian Networks. **enumeration_ask** and **enumerate_all** implement the algorithm described in **Figure 14.9** of the book." ] }, { "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "psource(BayesNode.sample)" - ] - }, - { - "cell_type": "markdown", + "execution_count": 31, "metadata": {}, - "source": [ - "Before we consider the different algorithms in this section let us look at the **BayesNode.sample** method. It samples from the distribution for this variable conditioned on event's values for parent_variables. That is, return True/False at random according to with the conditional probability given the parents. The **probability** function is a simple helper from **utils** module which returns True with the probability passed to it.\n", - "\n", + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + " Codestin Search App\n", + " \n", + " \n", + "\n", + "\n", + "

\n", + "\n", + "
def enumerate_all(variables, e, bn):\n",
+       "    """Return the sum of those entries in P(variables | e{others})\n",
+       "    consistent with e, where P is the joint distribution represented\n",
+       "    by bn, and e{others} means e restricted to bn's other variables\n",
+       "    (the ones other than variables). Parents must precede children in variables."""\n",
+       "    if not variables:\n",
+       "        return 1.0\n",
+       "    Y, rest = variables[0], variables[1:]\n",
+       "    Ynode = bn.variable_node(Y)\n",
+       "    if Y in e:\n",
+       "        return Ynode.p(e[Y], e) * enumerate_all(rest, e, bn)\n",
+       "    else:\n",
+       "        return sum(Ynode.p(y, e) * enumerate_all(rest, extend(e, Y, y), bn)\n",
+       "                   for y in bn.variable_values(Y))\n",
+       "
\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "psource(enumerate_all)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**enumerate_all** recursively evaluates a general form of the **Equation 14.4** in the book.\n", + "\n", + "$$\\textbf{P}(X | \\textbf{e}) = α \\textbf{P}(X, \\textbf{e}) = α \\sum_{y} \\textbf{P}(X, \\textbf{e}, \\textbf{y})$$ \n", + "\n", + "such that **P(X, e, y)** is written in the form of product of conditional probabilities **P(variable | parents(variable))** from the Bayesian Network.\n", + "\n", + "**enumeration_ask** calls **enumerate_all** on each value of query variable **X** and finally normalizes them. \n" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + " Codestin Search App\n", + " \n", + " \n", + "\n", + "\n", + "

\n", + "\n", + "
def enumeration_ask(X, e, bn):\n",
+       "    """Return the conditional probability distribution of variable X\n",
+       "    given evidence e, from BayesNet bn. [Figure 14.9]\n",
+       "    >>> enumeration_ask('Burglary', dict(JohnCalls=T, MaryCalls=T), burglary\n",
+       "    ...  ).show_approx()\n",
+       "    'False: 0.716, True: 0.284'"""\n",
+       "    assert X not in e, "Query variable must be distinct from evidence"\n",
+       "    Q = ProbDist(X)\n",
+       "    for xi in bn.variable_values(X):\n",
+       "        Q[xi] = enumerate_all(bn.variables, extend(e, X, xi), bn)\n",
+       "    return Q.normalize()\n",
+       "
\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "psource(enumeration_ask)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let us solve the problem of finding out **P(Burglary=True | JohnCalls=True, MaryCalls=True)** using the **burglary** network. **enumeration_ask** takes three arguments **X** = variable name, **e** = Evidence (in form a dict like previously explained), **bn** = The Bayes Net to do inference on." + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.2841718353643929" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ans_dist = enumeration_ask('Burglary', {'JohnCalls': True, 'MaryCalls': True}, burglary)\n", + "ans_dist[True]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Variable Elimination\n", + "\n", + "The enumeration algorithm can be improved substantially by eliminating repeated calculations. In enumeration we join the joint of all hidden variables. This is of exponential size for the number of hidden variables. Variable elimination employes interleaving join and marginalization.\n", + "\n", + "Before we look into the implementation of Variable Elimination we must first familiarize ourselves with Factors. \n", + "\n", + "In general we call a multidimensional array of type P(Y1 ... Yn | X1 ... Xm) a factor where some of Xs and Ys maybe assigned values. Factors are implemented in the probability module as the class **Factor**. They take as input **variables** and **cpt**. \n", + "\n", + "\n", + "#### Helper Functions\n", + "\n", + "There are certain helper functions that help creating the **cpt** for the Factor given the evidence. Let us explore them one by one." + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + " Codestin Search App\n", + " \n", + " \n", + "\n", + "\n", + "

\n", + "\n", + "
def make_factor(var, e, bn):\n",
+       "    """Return the factor for var in bn's joint distribution given e.\n",
+       "    That is, bn's full joint distribution, projected to accord with e,\n",
+       "    is the pointwise product of these factors for bn's variables."""\n",
+       "    node = bn.variable_node(var)\n",
+       "    variables = [X for X in [var] + node.parents if X not in e]\n",
+       "    cpt = {event_values(e1, variables): node.p(e1[var], e1)\n",
+       "           for e1 in all_events(variables, bn, e)}\n",
+       "    return Factor(variables, cpt)\n",
+       "
\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "psource(make_factor)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**make_factor** is used to create the **cpt** and **variables** that will be passed to the constructor of **Factor**. We use **make_factor** for each variable. It takes in the arguments **var** the particular variable, **e** the evidence we want to do inference on, **bn** the bayes network.\n", + "\n", + "Here **variables** for each node refers to a list consisting of the variable itself and the parents minus any variables that are part of the evidence. This is created by finding the **node.parents** and filtering out those that are not part of the evidence.\n", + "\n", + "The **cpt** created is the one similar to the original **cpt** of the node with only rows that agree with the evidence." + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + " Codestin Search App\n", + " \n", + " \n", + "\n", + "\n", + "

\n", + "\n", + "
def all_events(variables, bn, e):\n",
+       "    """Yield every way of extending e with values for all variables."""\n",
+       "    if not variables:\n",
+       "        yield e\n",
+       "    else:\n",
+       "        X, rest = variables[0], variables[1:]\n",
+       "        for e1 in all_events(rest, bn, e):\n",
+       "            for x in bn.variable_values(X):\n",
+       "                yield extend(e1, X, x)\n",
+       "
\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "psource(all_events)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The **all_events** function is a recursive generator function which yields a key for the orignal **cpt** which is part of the node. This works by extending evidence related to the node, thus all the output from **all_events** only includes events that support the evidence. Given **all_events** is a generator function one such event is returned on every call. \n", + "\n", + "We can try this out using the example on **Page 524** of the book. We will make **f**5(A) = P(m | A)" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [], + "source": [ + "f5 = make_factor('MaryCalls', {'JohnCalls': True, 'MaryCalls': True}, burglary)" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "f5" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{(False,): 0.01, (True,): 0.7}" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "f5.cpt" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['Alarm']" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "f5.variables" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here **f5.cpt** False key gives probability for **P(MaryCalls=True | Alarm = False)**. Due to our representation where we only store probabilities for only in cases where the node variable is True this is the same as the **cpt** of the BayesNode. Let us try a somewhat different example from the book where evidence is that the Alarm = True" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [], + "source": [ + "new_factor = make_factor('MaryCalls', {'Alarm': True}, burglary)" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{(False,): 0.30000000000000004, (True,): 0.7}" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "new_factor.cpt" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here the **cpt** is for **P(MaryCalls | Alarm = True)**. Therefore the probabilities for True and False sum up to one. Note the difference between both the cases. Again the only rows included are those consistent with the evidence.\n", + "\n", + "#### Operations on Factors\n", + "\n", + "We are interested in two kinds of operations on factors. **Pointwise Product** which is used to created joint distributions and **Summing Out** which is used for marginalization." + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + " Codestin Search App\n", + " \n", + " \n", + "\n", + "\n", + "

\n", + "\n", + "
    def pointwise_product(self, other, bn):\n",
+       "        """Multiply two factors, combining their variables."""\n",
+       "        variables = list(set(self.variables) | set(other.variables))\n",
+       "        cpt = {event_values(e, variables): self.p(e) * other.p(e)\n",
+       "               for e in all_events(variables, bn, {})}\n",
+       "        return Factor(variables, cpt)\n",
+       "
\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "psource(Factor.pointwise_product)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Factor.pointwise_product** implements a method of creating a joint via combining two factors. We take the union of **variables** of both the factors and then generate the **cpt** for the new factor using **all_events** function. Note that the given we have eliminated rows that are not consistent with the evidence. Pointwise product assigns new probabilities by multiplying rows similar to that in a database join." + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + " Codestin Search App\n", + " \n", + " \n", + "\n", + "\n", + "

\n", + "\n", + "
def pointwise_product(factors, bn):\n",
+       "    return reduce(lambda f, g: f.pointwise_product(g, bn), factors)\n",
+       "
\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "psource(pointwise_product)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**pointwise_product** extends this operation to more than two operands where it is done sequentially in pairs of two." + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + " Codestin Search App\n", + " \n", + " \n", + "\n", + "\n", + "

\n", + "\n", + "
    def sum_out(self, var, bn):\n",
+       "        """Make a factor eliminating var by summing over its values."""\n",
+       "        variables = [X for X in self.variables if X != var]\n",
+       "        cpt = {event_values(e, variables): sum(self.p(extend(e, var, val))\n",
+       "                                               for val in bn.variable_values(var))\n",
+       "               for e in all_events(variables, bn, {})}\n",
+       "        return Factor(variables, cpt)\n",
+       "
\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "psource(Factor.sum_out)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Factor.sum_out** makes a factor eliminating a variable by summing over its values. Again **events_all** is used to generate combinations for the rest of the variables." + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + " Codestin Search App\n", + " \n", + " \n", + "\n", + "\n", + "

\n", + "\n", + "
def sum_out(var, factors, bn):\n",
+       "    """Eliminate var from all factors by summing over its values."""\n",
+       "    result, var_factors = [], []\n",
+       "    for f in factors:\n",
+       "        (var_factors if var in f.variables else result).append(f)\n",
+       "    result.append(pointwise_product(var_factors, bn).sum_out(var, bn))\n",
+       "    return result\n",
+       "
\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "psource(sum_out)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**sum_out** uses both **Factor.sum_out** and **pointwise_product** to finally eliminate a particular variable from all factors by summing over its values." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Elimination Ask\n", + "\n", + "The algorithm described in **Figure 14.11** of the book is implemented by the function **elimination_ask**. We use this for inference. The key idea is that we eliminate the hidden variables by interleaving joining and marginalization. It takes in 3 arguments **X** the query variable, **e** the evidence variable and **bn** the Bayes network. \n", + "\n", + "The algorithm creates factors out of Bayes Nodes in reverse order and eliminates hidden variables using **sum_out**. Finally it takes a point wise product of all factors and normalizes. Let us finally solve the problem of inferring \n", + "\n", + "**P(Burglary=True | JohnCalls=True, MaryCalls=True)** using variable elimination." + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + " Codestin Search App\n", + " \n", + " \n", + "\n", + "\n", + "

\n", + "\n", + "
def elimination_ask(X, e, bn):\n",
+       "    """Compute bn's P(X|e) by variable elimination. [Figure 14.11]\n",
+       "    >>> elimination_ask('Burglary', dict(JohnCalls=T, MaryCalls=T), burglary\n",
+       "    ...  ).show_approx()\n",
+       "    'False: 0.716, True: 0.284'"""\n",
+       "    assert X not in e, "Query variable must be distinct from evidence"\n",
+       "    factors = []\n",
+       "    for var in reversed(bn.variables):\n",
+       "        factors.append(make_factor(var, e, bn))\n",
+       "        if is_hidden(var, X, e):\n",
+       "            factors = sum_out(var, factors, bn)\n",
+       "    return pointwise_product(factors, bn).normalize()\n",
+       "
\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "psource(elimination_ask)" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'False: 0.716, True: 0.284'" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "elimination_ask('Burglary', dict(JohnCalls=True, MaryCalls=True), burglary).show_approx()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Runtime comparison\n", + "Let's see how the runtimes of these two algorithms compare.\n", + "We expect variable elimination to outperform enumeration by a large margin as we reduce the number of repetitive calculations significantly." + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "367 µs ± 126 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n" + ] + } + ], + "source": [ + "%%timeit\n", + "enumeration_ask('Burglary', dict(JohnCalls=True, MaryCalls=True), burglary).show_approx()" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "241 µs ± 64.6 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n" + ] + } + ], + "source": [ + "%%timeit\n", + "elimination_ask('Burglary', dict(JohnCalls=True, MaryCalls=True), burglary).show_approx()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We observe that variable elimination was faster than enumeration as we had expected but the gain in speed is not a lot, in fact it is just about 30% faster.\n", + "
\n", + "This happened because the bayesian network in question is pretty small, with just 5 nodes, some of which aren't even required in the inference process.\n", + "For more complicated networks, variable elimination will be significantly faster and runtime will reduce not just by a constant factor, but by a polynomial factor proportional to the number of nodes, due to the reduction in repeated calculations." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Approximate Inference in Bayesian Networks\n", + "\n", + "Exact inference fails to scale for very large and complex Bayesian Networks. This section covers implementation of randomized sampling algorithms, also called Monte Carlo algorithms." + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + " Codestin Search App\n", + " \n", + " \n", + "\n", + "\n", + "

\n", + "\n", + "
    def sample(self, event):\n",
+       "        """Sample from the distribution for this variable conditioned\n",
+       "        on event's values for parent_variables. That is, return True/False\n",
+       "        at random according with the conditional probability given the\n",
+       "        parents."""\n",
+       "        return probability(self.p(True, event))\n",
+       "
\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "psource(BayesNode.sample)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Before we consider the different algorithms in this section let us look at the **BayesNode.sample** method. It samples from the distribution for this variable conditioned on event's values for parent_variables. That is, return True/False at random according to with the conditional probability given the parents. The **probability** function is a simple helper from **utils** module which returns True with the probability passed to it.\n", + "\n", "### Prior Sampling\n", "\n", - "The idea of Prior Sampling is to sample from the Bayesian Network in a topological order. We start at the top of the network and sample as per **P(Xi | parents(Xi)** i.e. the probability distribution from which the value is sampled is conditioned on the values already assigned to the variable's parents. This can be thought of as a simulation." + "The idea of Prior Sampling is to sample from the Bayesian Network in a topological order. We start at the top of the network and sample as per **P(Xi | parents(Xi)** i.e. the probability distribution from which the value is sampled is conditioned on the values already assigned to the variable's parents. This can be thought of as a simulation." + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + " Codestin Search App\n", + " \n", + " \n", + "\n", + "\n", + "

\n", + "\n", + "
def prior_sample(bn):\n",
+       "    """Randomly sample from bn's full joint distribution. The result\n",
+       "    is a {variable: value} dict. [Figure 14.13]"""\n",
+       "    event = {}\n",
+       "    for node in bn.nodes:\n",
+       "        event[node.variable] = node.sample(event)\n",
+       "    return event\n",
+       "
\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "psource(prior_sample)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The function **prior_sample** implements the algorithm described in **Figure 14.13** of the book. Nodes are sampled in the topological order. The old value of the event is passed as evidence for parent values. We will use the Bayesian Network in **Figure 14.12** to try out the **prior_sample**\n", + "\n", + "\n", + "\n", + "Traversing the graph in topological order is important.\n", + "There are two possible topological orderings for this particular directed acyclic graph.\n", + "
\n", + "1. `Cloudy -> Sprinkler -> Rain -> Wet Grass`\n", + "2. `Cloudy -> Rain -> Sprinkler -> Wet Grass`\n", + "
\n", + "
\n", + "We can follow any of the two orderings to sample from the network.\n", + "Any ordering other than these two, however, cannot be used.\n", + "
\n", + "One way to think about this is that `Cloudy` can be seen as a precondition of both `Rain` and `Sprinkler` and just like we have seen in planning, preconditions need to be satisfied before a certain action can be executed.\n", + "
\n", + "We store the samples on the observations. Let us find **P(Rain=True)** by taking 1000 random samples from the network." + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [], + "source": [ + "N = 1000\n", + "all_observations = [prior_sample(sprinkler) for x in range(N)]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we filter to get the observations where Rain = True" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [], + "source": [ + "rain_true = [observation for observation in all_observations if observation['Rain'] == True]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Finally, we can find **P(Rain=True)**" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.496\n" + ] + } + ], + "source": [ + "answer = len(rain_true) / N\n", + "print(answer)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Sampling this another time might give different results as we have no control over the distribution of the random samples" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.503\n" + ] + } + ], + "source": [ + "N = 1000\n", + "all_observations = [prior_sample(sprinkler) for x in range(N)]\n", + "rain_true = [observation for observation in all_observations if observation['Rain'] == True]\n", + "answer = len(rain_true) / N\n", + "print(answer)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To evaluate a conditional distribution. We can use a two-step filtering process. We first separate out the variables that are consistent with the evidence. Then for each value of query variable, we can find probabilities. For example to find **P(Cloudy=True | Rain=True)**. We have already filtered out the values consistent with our evidence in **rain_true**. Now we apply a second filtering step on **rain_true** to find **P(Rain=True and Cloudy=True)**" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.8091451292246521\n" + ] + } + ], + "source": [ + "rain_and_cloudy = [observation for observation in rain_true if observation['Cloudy'] == True]\n", + "answer = len(rain_and_cloudy) / len(rain_true)\n", + "print(answer)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Rejection Sampling\n", + "\n", + "Rejection Sampling is based on an idea similar to what we did just now. \n", + "First, it generates samples from the prior distribution specified by the network. \n", + "Then, it rejects all those that do not match the evidence. \n", + "
\n", + "Rejection sampling is advantageous only when we know the query beforehand.\n", + "While prior sampling generally works for any query, it might fail in some scenarios.\n", + "
\n", + "Let's say we have a generic Bayesian network and we have evidence `e`, and we want to know how many times a state `A` is true, given evidence `e` is true.\n", + "Normally, prior sampling can answer this question, but let's assume that the probability of evidence `e` being true in our actual probability distribution is very small.\n", + "In this situation, it might be possible that sampling never encounters a data-point where `e` is true.\n", + "If our sampled data has no instance of `e` being true, `P(e) = 0`, and therefore `P(A | e) / P(e) = 0/0`, which is undefined.\n", + "We cannot find the required value using this sample.\n", + "
\n", + "We can definitely increase the number of sample points, but we can never guarantee that we will encounter the case where `e` is non-zero (assuming our actual probability distribution has atleast one case where `e` is true).\n", + "To guarantee this, we would have to consider every single data point, which means we lose the speed advantage that approximation provides us and we essentially have to calculate the exact inference model of the Bayesian network.\n", + "
\n", + "
\n", + "Rejection sampling will be useful in this situation, as we already know the query.\n", + "
\n", + "While sampling from the network, we will reject any sample which is inconsistent with the evidence variables of the given query (in this example, the only evidence variable is `e`).\n", + "We will only consider samples that do not violate **any** of the evidence variables.\n", + "In this way, we will have enough data with the required evidence to infer queries involving a subset of that evidence.\n", + "
\n", + "
\n", + "The function **rejection_sampling** implements the algorithm described by **Figure 14.14**" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + " Codestin Search App\n", + " \n", + " \n", + "\n", + "\n", + "

\n", + "\n", + "
def rejection_sampling(X, e, bn, N):\n",
+       "    """Estimate the probability distribution of variable X given\n",
+       "    evidence e in BayesNet bn, using N samples.  [Figure 14.14]\n",
+       "    Raises a ZeroDivisionError if all the N samples are rejected,\n",
+       "    i.e., inconsistent with e.\n",
+       "    >>> random.seed(47)\n",
+       "    >>> rejection_sampling('Burglary', dict(JohnCalls=T, MaryCalls=T),\n",
+       "    ...   burglary, 10000).show_approx()\n",
+       "    'False: 0.7, True: 0.3'\n",
+       "    """\n",
+       "    counts = {x: 0 for x in bn.variable_values(X)}  # bold N in [Figure 14.14]\n",
+       "    for j in range(N):\n",
+       "        sample = prior_sample(bn)  # boldface x in [Figure 14.14]\n",
+       "        if consistent_with(sample, e):\n",
+       "            counts[sample[X]] += 1\n",
+       "    return ProbDist(X, counts)\n",
+       "
\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "psource(rejection_sampling)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The function keeps counts of each of the possible values of the Query variable and increases the count when we see an observation consistent with the evidence. It takes in input parameters **X** - The Query Variable, **e** - evidence, **bn** - Bayes net and **N** - number of prior samples to generate.\n", + "\n", + "**consistent_with** is used to check consistency." + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + " Codestin Search App\n", + " \n", + " \n", + "\n", + "\n", + "

\n", + "\n", + "
def consistent_with(event, evidence):\n",
+       "    """Is event consistent with the given evidence?"""\n",
+       "    return all(evidence.get(k, v) == v\n",
+       "               for k, v in event.items())\n",
+       "
\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "psource(consistent_with)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To answer **P(Cloudy=True | Rain=True)**" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.7660377358490567" + ] + }, + "execution_count": 59, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "p = rejection_sampling('Cloudy', dict(Rain=True), sprinkler, 1000)\n", + "p[True]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Likelihood Weighting\n", + "\n", + "Rejection sampling takes a long time to run when the probability of finding consistent evidence is low. It is also slow for larger networks and more evidence variables.\n", + "Rejection sampling tends to reject a lot of samples if our evidence consists of a large number of variables. Likelihood Weighting solves this by fixing the evidence (i.e. not sampling it) and then using weights to make sure that our overall sampling is still consistent.\n", + "\n", + "The pseudocode in **Figure 14.15** is implemented as **likelihood_weighting** and **weighted_sample**." + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + " Codestin Search App\n", + " \n", + " \n", + "\n", + "\n", + "

\n", + "\n", + "
def weighted_sample(bn, e):\n",
+       "    """Sample an event from bn that's consistent with the evidence e;\n",
+       "    return the event and its weight, the likelihood that the event\n",
+       "    accords to the evidence."""\n",
+       "    w = 1\n",
+       "    event = dict(e)  # boldface x in [Figure 14.15]\n",
+       "    for node in bn.nodes:\n",
+       "        Xi = node.variable\n",
+       "        if Xi in e:\n",
+       "            w *= node.p(e[Xi], event)\n",
+       "        else:\n",
+       "            event[Xi] = node.sample(event)\n",
+       "    return event, w\n",
+       "
\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "psource(weighted_sample)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "**weighted_sample** samples an event from Bayesian Network that's consistent with the evidence **e** and returns the event and its weight, the likelihood that the event accords to the evidence. It takes in two parameters **bn** the Bayesian Network and **e** the evidence.\n", + "\n", + "The weight is obtained by multiplying **P(xi | parents(xi))** for each node in evidence. We set the values of **event = evidence** at the start of the function." + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "({'Cloudy': True, 'Rain': True, 'Sprinkler': False, 'WetGrass': True}, 0.8)" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "weighted_sample(sprinkler, dict(Rain=True))" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + " Codestin Search App\n", + " \n", + " \n", + "\n", + "\n", + "

\n", + "\n", + "
def likelihood_weighting(X, e, bn, N):\n",
+       "    """Estimate the probability distribution of variable X given\n",
+       "    evidence e in BayesNet bn.  [Figure 14.15]\n",
+       "    >>> random.seed(1017)\n",
+       "    >>> likelihood_weighting('Burglary', dict(JohnCalls=T, MaryCalls=T),\n",
+       "    ...   burglary, 10000).show_approx()\n",
+       "    'False: 0.702, True: 0.298'\n",
+       "    """\n",
+       "    W = {x: 0 for x in bn.variable_values(X)}\n",
+       "    for j in range(N):\n",
+       "        sample, weight = weighted_sample(bn, e)  # boldface x, w in [Figure 14.15]\n",
+       "        W[sample[X]] += weight\n",
+       "    return ProbDist(X, W)\n",
+       "
\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "psource(likelihood_weighting)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**likelihood_weighting** implements the algorithm to solve our inference problem. The code is similar to **rejection_sampling** but instead of adding one for each sample we add the weight obtained from **weighted_sampling**." + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'False: 0.194, True: 0.806'" + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "likelihood_weighting('Cloudy', dict(Rain=True), sprinkler, 200).show_approx()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Gibbs Sampling\n", + "\n", + "In likelihood sampling, it is possible to obtain low weights in cases where the evidence variables reside at the bottom of the Bayesian Network. This can happen because influence only propagates downwards in likelihood sampling.\n", + "\n", + "Gibbs Sampling solves this. The implementation of **Figure 14.16** is provided in the function **gibbs_ask** " + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + " Codestin Search App\n", + " \n", + " \n", + "\n", + "\n", + "

\n", + "\n", + "
def gibbs_ask(X, e, bn, N):\n",
+       "    """[Figure 14.16]"""\n",
+       "    assert X not in e, "Query variable must be distinct from evidence"\n",
+       "    counts = {x: 0 for x in bn.variable_values(X)}  # bold N in [Figure 14.16]\n",
+       "    Z = [var for var in bn.variables if var not in e]\n",
+       "    state = dict(e)  # boldface x in [Figure 14.16]\n",
+       "    for Zi in Z:\n",
+       "        state[Zi] = random.choice(bn.variable_values(Zi))\n",
+       "    for j in range(N):\n",
+       "        for Zi in Z:\n",
+       "            state[Zi] = markov_blanket_sample(Zi, state, bn)\n",
+       "            counts[state[X]] += 1\n",
+       "    return ProbDist(X, counts)\n",
+       "
\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "psource(gibbs_ask)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In **gibbs_ask** we initialize the non-evidence variables to random values. And then select non-evidence variables and sample it from **P(Variable | value in the current state of all remaining vars) ** repeatedly sample. In practice, we speed this up by using **markov_blanket_sample** instead. This works because terms not involving the variable get canceled in the calculation. The arguments for **gibbs_ask** are similar to **likelihood_weighting**" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'False: 0.175, True: 0.825'" + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gibbs_ask('Cloudy', dict(Rain=True), sprinkler, 200).show_approx()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Runtime analysis\n", + "Let's take a look at how much time each algorithm takes." + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "11.4 ms ± 4.1 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" + ] + } + ], + "source": [ + "%%timeit\n", + "all_observations = [prior_sample(sprinkler) for x in range(1000)]\n", + "rain_true = [observation for observation in all_observations if observation['Rain'] == True]\n", + "len([observation for observation in rain_true if observation['Cloudy'] == True]) / len(rain_true)" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "8.63 ms ± 272 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" + ] + } + ], + "source": [ + "%%timeit\n", + "rejection_sampling('Cloudy', dict(Rain=True), sprinkler, 1000)" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1.96 ms ± 696 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n" + ] + } + ], + "source": [ + "%%timeit\n", + "likelihood_weighting('Cloudy', dict(Rain=True), sprinkler, 200)" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "7.03 ms ± 117 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" + ] + } + ], + "source": [ + "%%timeit\n", + "gibbs_ask('Cloudy', dict(Rain=True), sprinkler, 200)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As expected, all algorithms have a very similar runtime.\n", + "However, rejection sampling would be a lot faster and more accurate when the probabiliy of finding data-points consistent with the required evidence is small.\n", + "
\n", + "Likelihood weighting is the fastest out of all as it doesn't involve rejecting samples, but also has a quite high variance." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## HIDDEN MARKOV MODELS" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Often, we need to carry out probabilistic inference on temporal data or a sequence of observations where the order of observations matter.\n", + "We require a model similar to a Bayesian Network, but one that grows over time to keep up with the latest evidences.\n", + "If you are familiar with the `mdp` module or Markov models in general, you can probably guess that a Markov model might come close to representing our problem accurately.\n", + "
\n", + "A Markov model is basically a chain-structured Bayesian Network in which there is one state for each time step and each node has an identical probability distribution.\n", + "The first node, however, has a different distribution, called the prior distribution which models the initial state of the process.\n", + "A state in a Markov model depends only on the previous state and the latest evidence and not on the states before it.\n", + "
\n", + "A **Hidden Markov Model** or **HMM** is a special case of a Markov model in which the state of the process is described by a single discrete random variable.\n", + "The possible values of the variable are the possible states of the world.\n", + "
\n", + "But what if we want to model a process with two or more state variables?\n", + "In that case, we can still fit the process into the HMM framework by redefining our state variables as a single \"megavariable\".\n", + "We do this because carrying out inference on HMMs have standard optimized algorithms.\n", + "A HMM is very similar to an MDP, but we don't have the option of taking actions like in MDPs, instead, the process carries on as new evidence appears.\n", + "
\n", + "If a HMM is truncated at a fixed length, it becomes a Bayesian network and general BN inference can be used on it to answer queries.\n", + "\n", + "Before we start, it will be helpful to understand the structure of a temporal model. We will use the example of the book with the guard and the umbrella. In this example, the state $\\textbf{X}$ is whether it is a rainy day (`X = True`) or not (`X = False`) at Day $\\textbf{t}$. In the sensor or observation model, the observation or evidence $\\textbf{U}$ is whether the professor holds an umbrella (`U = True`) or not (`U = False`) on **Day** $\\textbf{t}$. Based on that, the transition model is \n", + "\n", + "| $X_{t-1}$ | $X_{t}$ | **P**$(X_{t}| X_{t-1})$| \n", + "| ------------- |------------- | ----------------------------------|\n", + "| ***${False}$*** | ***${False}$*** | 0.7 |\n", + "| ***${False}$*** | ***${True}$*** | 0.3 |\n", + "| ***${True}$*** | ***${False}$*** | 0.3 |\n", + "| ***${True}$*** | ***${True}$*** | 0.7 |\n", + "\n", + "And the the sensor model will be,\n", + "\n", + "| $X_{t}$ | $U_{t}$ | **P**$(U_{t}|X_{t})$| \n", + "| :-------------: |:-------------: | :------------------------:|\n", + "| ***${False}$*** | ***${True}$*** | 0.2 |\n", + "| ***${False}$*** | ***${False}$*** | 0.8 |\n", + "| ***${True}$*** | ***${True}$*** | 0.9 |\n", + "| ***${True}$*** | ***${False}$*** | 0.1 |\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "HMMs are implemented in the **`HiddenMarkovModel`** class.\n", + "Let's have a look." + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + " Codestin Search App\n", + " \n", + " \n", + "\n", + "\n", + "

\n", + "\n", + "
class HiddenMarkovModel:\n",
+       "    """A Hidden markov model which takes Transition model and Sensor model as inputs"""\n",
+       "\n",
+       "    def __init__(self, transition_model, sensor_model, prior=None):\n",
+       "        self.transition_model = transition_model\n",
+       "        self.sensor_model = sensor_model\n",
+       "        self.prior = prior or [0.5, 0.5]\n",
+       "\n",
+       "    def sensor_dist(self, ev):\n",
+       "        if ev is True:\n",
+       "            return self.sensor_model[0]\n",
+       "        else:\n",
+       "            return self.sensor_model[1]\n",
+       "
\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "psource(HiddenMarkovModel)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We instantiate the object **`hmm`** of the class using a list of lists for both the transition and the sensor model." + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": {}, + "outputs": [], + "source": [ + "umbrella_transition_model = [[0.7, 0.3], [0.3, 0.7]]\n", + "umbrella_sensor_model = [[0.9, 0.2], [0.1, 0.8]]\n", + "hmm = HiddenMarkovModel(umbrella_transition_model, umbrella_sensor_model)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The **`sensor_dist()`** method returns a list with the conditional probabilities of the sensor model." + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[0.9, 0.2]" + ] + }, + "execution_count": 72, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "hmm.sensor_dist(ev=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now that we have defined an HMM object, our task here is to compute the belief $B_{t}(x)= P(X_{t}|U_{1:t})$ given evidence **U** at each time step **t**.\n", + "
\n", + "The basic inference tasks that must be solved are:\n", + "1. **Filtering**: Computing the posterior probability distribution over the most recent state, given all the evidence up to the current time step.\n", + "2. **Prediction**: Computing the posterior probability distribution over the future state.\n", + "3. **Smoothing**: Computing the posterior probability distribution over a past state. Smoothing provides a better estimation as it incorporates more evidence.\n", + "4. **Most likely explanation**: Finding the most likely sequence of states for a given observation\n", + "5. **Learning**: The transition and sensor models can be learnt, if not yet known, just like in an information gathering agent\n", + "
\n", + "
\n", + "\n", + "There are three primary methods to carry out inference in Hidden Markov Models:\n", + "1. The Forward-Backward algorithm\n", + "2. Fixed lag smoothing\n", + "3. Particle filtering\n", + "\n", + "Let's have a look at how we can carry out inference and answer queries based on our umbrella HMM using these algorithms." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### FORWARD-BACKWARD\n", + "This is a general algorithm that works for all Markov models, not just HMMs.\n", + "In the filtering task (inference) we are given evidence **U** in each time **t** and we want to compute the belief $B_{t}(x)= P(X_{t}|U_{1:t})$. \n", + "We can think of it as a three step process:\n", + "1. In every step we start with the current belief $P(X_{t}|e_{1:t})$\n", + "2. We update it for time\n", + "3. We update it for evidence\n", + "\n", + "The forward algorithm performs the step 2 and 3 at once. It updates, or better say reweights, the initial belief using the transition and the sensor model. Let's see the umbrella example. On **Day 0** no observation is available, and for that reason we will assume that we have equal possibilities to rain or not. In the **`HiddenMarkovModel`** class, the prior probabilities for **Day 0** are by default [0.5, 0.5]. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The observation update is calculated with the **`forward()`** function. Basically, we update our belief using the observation model. The function returns a list with the probabilities of **raining or not** on **Day 1**." + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + " Codestin Search App\n", + " \n", + " \n", + "\n", + "\n", + "

\n", + "\n", + "
def forward(HMM, fv, ev):\n",
+       "    prediction = vector_add(scalar_vector_product(fv[0], HMM.transition_model[0]),\n",
+       "                            scalar_vector_product(fv[1], HMM.transition_model[1]))\n",
+       "    sensor_dist = HMM.sensor_dist(ev)\n",
+       "\n",
+       "    return normalize(element_wise_product(sensor_dist, prediction))\n",
+       "
\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "psource(forward)" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The probability of raining on day 1 is 0.82\n" + ] + } + ], + "source": [ + "umbrella_prior = [0.5, 0.5]\n", + "belief_day_1 = forward(hmm, umbrella_prior, ev=True)\n", + "print ('The probability of raining on day 1 is {:.2f}'.format(belief_day_1[0]))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In **Day 2** our initial belief is the updated belief of **Day 1**.\n", + "Again using the **`forward()`** function we can compute the probability of raining in **Day 2**" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The probability of raining in day 2 is 0.88\n" + ] + } + ], + "source": [ + "belief_day_2 = forward(hmm, belief_day_1, ev=True)\n", + "print ('The probability of raining in day 2 is {:.2f}'.format(belief_day_2[0]))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In the smoothing part we are interested in computing the distribution over past states given evidence up to the present. Assume that we want to compute the distribution for the time **k**, for $0\\leq k\n", + "\n", + "\n", + "\n", + " Codestin Search App\n", + " \n", + " \n", + "\n", + "\n", + "

\n", + "\n", + "
def backward(HMM, b, ev):\n",
+       "    sensor_dist = HMM.sensor_dist(ev)\n",
+       "    prediction = element_wise_product(sensor_dist, b)\n",
+       "\n",
+       "    return normalize(vector_add(scalar_vector_product(prediction[0], HMM.transition_model[0]),\n",
+       "                                scalar_vector_product(prediction[1], HMM.transition_model[1])))\n",
+       "
\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "psource(backward)" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[0.6272727272727272, 0.37272727272727274]" + ] + }, + "execution_count": 77, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "b = [1, 1]\n", + "backward(hmm, b, ev=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Some may notice that the result is not the same as in the book. The main reason is that in the book the normalization step is not used. If we want to normalize the result, one can use the **`normalize()`** helper function.\n", + "\n", + "In order to find the smoothed estimate for raining in **Day k**, we will use the **`forward_backward()`** function. As in the example in the book, the umbrella is observed in both days and the prior distribution is [0.5, 0.5]" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "### AIMA3e\n", + "__function__ FORWARD-BACKWARD(__ev__, _prior_) __returns__ a vector of probability distributions \n", + " __inputs__: __ev__, a vector of evidence values for steps 1,…,_t_ \n", + "     _prior_, the prior distribution on the initial state, __P__(__X__0) \n", + " __local variables__: __fv__, a vector of forward messages for steps 0,…,_t_ \n", + "        __b__, a representation of the backward message, initially all 1s \n", + "        __sv__, a vector of smoothed estimates for steps 1,…,_t_ \n", + "\n", + " __fv__\\[0\\] ← _prior_ \n", + " __for__ _i_ = 1 __to__ _t_ __do__ \n", + "   __fv__\\[_i_\\] ← FORWARD(__fv__\\[_i_ − 1\\], __ev__\\[_i_\\]) \n", + " __for__ _i_ = _t_ __downto__ 1 __do__ \n", + "   __sv__\\[_i_\\] ← NORMALIZE(__fv__\\[_i_\\] × __b__) \n", + "   __b__ ← BACKWARD(__b__, __ev__\\[_i_\\]) \n", + " __return__ __sv__\n", + "\n", + "---\n", + "__Figure ??__ The forward\\-backward algorithm for smoothing: computing posterior probabilities of a sequence of states given a sequence of observations. The FORWARD and BACKWARD operators are defined by Equations (__??__) and (__??__), respectively." + ], + "text/plain": [ + "" + ] + }, + "execution_count": 79, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pseudocode('Forward-Backward')" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The probability of raining in Day 0 is 0.65 and in Day 1 is 0.88\n" + ] + } + ], + "source": [ + "umbrella_prior = [0.5, 0.5]\n", + "prob = forward_backward(hmm, ev=[T, T], prior=umbrella_prior)\n", + "print ('The probability of raining in Day 0 is {:.2f} and in Day 1 is {:.2f}'.format(prob[0][0], prob[1][0]))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "Since HMMs are represented as single variable systems, we can represent the transition model and sensor model as matrices.\n", + "The `forward_backward` algorithm can be easily carried out on this representation (as we have done here) with a time complexity of $O({S}^{2} t)$ where t is the length of the sequence and each step multiplies a vector of size $S$ with a matrix of dimensions $SxS$.\n", + "
\n", + "Additionally, the forward pass stores $t$ vectors of size $S$ which makes the auxiliary space requirement equivalent to $O(St)$.\n", + "
\n", + "
\n", + "Is there any way we can improve the time or space complexity?\n", + "
\n", + "Fortunately, the matrix representation of HMM properties allows us to do so.\n", + "
\n", + "If $f$ and $b$ represent the forward and backward messages respectively, we can modify the smoothing algorithm by first\n", + "running the standard forward pass to compute $f_{t:t}$ (forgetting all the intermediate results) and then running\n", + "backward pass for both $b$ and $f$ together, using them to compute the smoothed estimate at each step.\n", + "This optimization reduces auxlilary space requirement to constant (irrespective of the length of the sequence) provided\n", + "the transition matrix is invertible and the sensor model has no zeros (which is sometimes hard to accomplish)\n", + "
\n", + "
\n", + "Let's look at another algorithm, that carries out smoothing in a more optimized way." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### FIXED LAG SMOOTHING\n", + "The matrix formulation allows to optimize online smoothing with a fixed lag.\n", + "
\n", + "Since smoothing can be done in constant, there should exist an algorithm whose time complexity is independent of the length of the lag.\n", + "For smoothing a time slice $t - d$ where $d$ is the lag, we need to compute $\\alpha f_{1:t-d}$ x $b_{t-d+1:t}$ incrementally.\n", + "
\n", + "As we already know, the forward equation is\n", + "
\n", + "$$f_{1:t+1} = \\alpha O_{t+1}{T}^{T}f_{1:t}$$\n", + "
\n", + "and the backward equation is\n", + "
\n", + "$$b_{k+1:t} = TO_{k+1}b_{k+2:t}$$\n", + "
\n", + "where $T$ and $O$ are the transition and sensor models respectively.\n", + "
\n", + "For smoothing, the forward message is easy to compute but there exists no simple relation between the backward message of this time step and the one at the previous time step, hence we apply the backward equation $d$ times to get\n", + "
\n", + "$$b_{t-d+1:t} = \\left ( \\prod_{i=t-d+1}^{t}{TO_i} \\right )b_{t+1:t} = B_{t-d+1:t}1$$\n", + "
\n", + "where $B_{t-d+1:t}$ is the product of the sequence of $T$ and $O$ matrices.\n", + "
\n", + "Here's how the `probability` module implements `fixed_lag_smoothing`.\n", + "
" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + " Codestin Search App\n", + " \n", + " \n", + "\n", + "\n", + "

\n", + "\n", + "
def fixed_lag_smoothing(e_t, HMM, d, ev, t):\n",
+       "    """[Figure 15.6]\n",
+       "    Smoothing algorithm with a fixed time lag of 'd' steps.\n",
+       "    Online algorithm that outputs the new smoothed estimate if observation\n",
+       "    for new time step is given."""\n",
+       "    ev.insert(0, None)\n",
+       "\n",
+       "    T_model = HMM.transition_model\n",
+       "    f = HMM.prior\n",
+       "    B = [[1, 0], [0, 1]]\n",
+       "    evidence = []\n",
+       "\n",
+       "    evidence.append(e_t)\n",
+       "    O_t = vector_to_diagonal(HMM.sensor_dist(e_t))\n",
+       "    if t > d:\n",
+       "        f = forward(HMM, f, e_t)\n",
+       "        O_tmd = vector_to_diagonal(HMM.sensor_dist(ev[t - d]))\n",
+       "        B = matrix_multiplication(inverse_matrix(O_tmd), inverse_matrix(T_model), B, T_model, O_t)\n",
+       "    else:\n",
+       "        B = matrix_multiplication(B, T_model, O_t)\n",
+       "    t += 1\n",
+       "\n",
+       "    if t > d:\n",
+       "        # always returns a 1x2 matrix\n",
+       "        return [normalize(i) for i in matrix_multiplication([f], B)][0]\n",
+       "    else:\n",
+       "        return None\n",
+       "
\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "psource(fixed_lag_smoothing)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This algorithm applies `forward` as usual and optimizes the smoothing step by using the equations above.\n", + "This optimization could be achieved only because HMM properties can be represented as matrices.\n", + "
\n", + "`vector_to_diagonal`, `matrix_multiplication` and `inverse_matrix` are matrix manipulation functions to simplify the implementation.\n", + "
\n", + "`normalize` is used to normalize the output before returning it." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here's how we can use `fixed_lag_smoothing` for inference on our umbrella HMM." + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "metadata": {}, + "outputs": [], + "source": [ + "umbrella_transition_model = [[0.7, 0.3], [0.3, 0.7]]\n", + "umbrella_sensor_model = [[0.9, 0.2], [0.1, 0.8]]\n", + "hmm = HiddenMarkovModel(umbrella_transition_model, umbrella_sensor_model)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Given evidence T, F, T, F and T, we want to calculate the probability distribution for the fourth day with a fixed lag of 2 days.\n", + "
\n", + "Let `e_t = False`" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[0.1111111111111111, 0.8888888888888888]" + ] + }, + "execution_count": 83, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "e_t = F\n", + "evidence = [T, F, T, F, T]\n", + "fixed_lag_smoothing(e_t, hmm, d=2, ev=evidence, t=4)" + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[0.9938650306748466, 0.006134969325153394]" + ] + }, + "execution_count": 84, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "e_t = T\n", + "evidence = [T, T, F, T, T]\n", + "fixed_lag_smoothing(e_t, hmm, d=1, ev=evidence, t=4)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We cannot calculate probability distributions when $t$ is less than $d$" + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "metadata": {}, + "outputs": [], + "source": [ + "fixed_lag_smoothing(e_t, hmm, d=5, ev=evidence, t=4)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As expected, the output is `None`" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### PARTICLE FILTERING\n", + "The filtering problem is too expensive to solve using the previous methods for problems with large or continuous state spaces.\n", + "Particle filtering is a method that can solve the same problem but when the state space is a lot larger, where we wouldn't be able to do these computations in a reasonable amount of time as fast, as time goes by, and we want to keep track of things as they happen.\n", + "
\n", + "The downside is that it is a sampling method and hence isn't accurate, but the more samples we're willing to take, the more accurate we'd get.\n", + "
\n", + "In this method, instead of keping track of the probability distribution, we will drop particles in a similar proportion at the required regions.\n", + "The internal representation of this distribution is usually a list of particles with coordinates in the state-space.\n", + "A particle is just a new name for a sample.\n", + "\n", + "Particle filtering can be divided into four steps:\n", + "1. __Initialization__: \n", + "If we have some idea about the prior probability distribution, we drop the initial particles accordingly, or else we just drop them uniformly over the state space.\n", + "\n", + "2. __Forward pass__: \n", + "As time goes by and measurements come in, we are going to move the selected particles into the grid squares that makes the most sense in terms of representing the distribution that we are trying to track.\n", + "When time goes by, we just loop through all our particles and try to simulate what could happen to each one of them by sampling its next position from the transition model.\n", + "This is like prior sampling - samples' frequencies reflect the transition probabilities.\n", + "If we have enough samples we are pretty close to exact values.\n", + "We work through the list of particles, one particle at a time, all we do is stochastically simulate what the outcome might be.\n", + "If we had no dimension of time, and we had no new measurements come in, this would be exactly the same as what we did in prior sampling.\n", + "\n", + "3. __Reweight__:\n", + "As observations come in, don't sample the observations, fix them and downweight the samples based on the evidence just like in likelihood weighting.\n", + "$$w(x) = P(e/x)$$\n", + "$$B(X) \\propto P(e/X)B'(X)$$\n", + "
\n", + "As before, the probabilities don't sum to one, since most have been downweighted.\n", + "They sum to an approximation of $P(e)$.\n", + "To normalize the resulting distribution, we can divide by $P(e)$\n", + "
\n", + "Likelihood weighting wasn't the best thing for Bayesian networks, because we were not accounting for the incoming evidence so we were getting samples from the prior distribution, in some sense not the right distribution, so we might end up with a lot of particles with low weights. \n", + "These samples were very uninformative and the way we fixed it then was by using __Gibbs sampling__.\n", + "Theoretically, Gibbs sampling can be run on a HMM, but as we iterated over the process infinitely many times in a Bayesian network, we cannot do that here as we have new incoming evidence and we also need computational cycles to propagate through time.\n", + "
\n", + "A lot of samples with very low weight and they are not representative of the _actual probability distribution_.\n", + "So if we keep running likelihood weighting, we keep propagating the samples with smaller weights and carry out computations for that even though these samples have no significant contribution to the actual probability distribution.\n", + "Which is why we require this last step.\n", + "\n", + "4. __Resample__:\n", + "Rather than tracking weighted samples, we _resample_.\n", + "We choose from our weighted sample distribution as many times as the number of particles we initially had and we replace these particles too, so that we have a constant number of particles.\n", + "This is equivalent to renormalizing the distribution.\n", + "The samples with low weight are rarely chosen in the new distribution after resampling.\n", + "This newer set of particles after resampling is in some sense more representative of the actual distribution and so we are better allocating our computational cycles.\n", + "Now the update is complete for this time step, continue with the next one.\n", + "\n", + "
\n", + "Let's see how this is implemented in the module." + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + " Codestin Search App\n", + " \n", + " \n", + "\n", + "\n", + "

\n", + "\n", + "
def particle_filtering(e, N, HMM):\n",
+       "    """Particle filtering considering two states variables."""\n",
+       "    dist = [0.5, 0.5]\n",
+       "    # Weight Initialization\n",
+       "    w = [0 for _ in range(N)]\n",
+       "    # STEP 1\n",
+       "    # Propagate one step using transition model given prior state\n",
+       "    dist = vector_add(scalar_vector_product(dist[0], HMM.transition_model[0]),\n",
+       "                      scalar_vector_product(dist[1], HMM.transition_model[1]))\n",
+       "    # Assign state according to probability\n",
+       "    s = ['A' if probability(dist[0]) else 'B' for _ in range(N)]\n",
+       "    w_tot = 0\n",
+       "    # Calculate importance weight given evidence e\n",
+       "    for i in range(N):\n",
+       "        if s[i] == 'A':\n",
+       "            # P(U|A)*P(A)\n",
+       "            w_i = HMM.sensor_dist(e)[0] * dist[0]\n",
+       "        if s[i] == 'B':\n",
+       "            # P(U|B)*P(B)\n",
+       "            w_i = HMM.sensor_dist(e)[1] * dist[1]\n",
+       "        w[i] = w_i\n",
+       "        w_tot += w_i\n",
+       "\n",
+       "    # Normalize all the weights\n",
+       "    for i in range(N):\n",
+       "        w[i] = w[i] / w_tot\n",
+       "\n",
+       "    # Limit weights to 4 digits\n",
+       "    for i in range(N):\n",
+       "        w[i] = float("{0:.4f}".format(w[i]))\n",
+       "\n",
+       "    # STEP 2\n",
+       "\n",
+       "    s = weighted_sample_with_replacement(N, s, w)\n",
+       "\n",
+       "    return s\n",
+       "
\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "psource(particle_filtering)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here, `scalar_vector_product` and `vector_add` are helper functions to help with vector math and `weighted_sample_with_replacement` resamples from a weighted sample and replaces the original sample, as is obvious from the name.\n", + "
\n", + "This implementation considers two state variables with generic names 'A' and 'B'.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here's how we can use `particle_filtering` on our umbrella HMM, though it doesn't make much sense using particle filtering on a problem with such a small state space.\n", + "It is just to get familiar with the syntax." ] }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 87, + "metadata": {}, + "outputs": [], + "source": [ + "umbrella_transition_model = [[0.7, 0.3], [0.3, 0.7]]\n", + "umbrella_sensor_model = [[0.9, 0.2], [0.1, 0.8]]\n", + "hmm = HiddenMarkovModel(umbrella_transition_model, umbrella_sensor_model)" + ] + }, + { + "cell_type": "code", + "execution_count": 88, "metadata": { - "collapsed": true + "scrolled": false }, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "['A', 'A', 'A', 'A', 'B', 'A', 'B', 'B', 'B', 'B']" + ] + }, + "execution_count": 88, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "psource(prior_sample)" + "particle_filtering(T, 10, hmm)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "The function **prior_sample** implements the algorithm described in **Figure 14.13** of the book. Nodes are sampled in the topological order. The old value of the event is passed as evidence for parent values. We will use the Bayesian Network in **Figure 14.12** to try out the **prior_sample**\n", - "\n", - "\n", - "\n", - "We store the samples on the observations. Let us find **P(Rain=True)**" + "We got 5 samples from state `A` and 5 samples from state `B`" ] }, { "cell_type": "code", - "execution_count": 39, - "metadata": { - "collapsed": true - }, - "outputs": [], + "execution_count": 89, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['A', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'A', 'B']" + ] + }, + "execution_count": 89, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "N = 1000\n", - "all_observations = [prior_sample(sprinkler) for x in range(N)]" + "particle_filtering([F, T, F, F, T], 10, hmm)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Now we filter to get the observations where Rain = True" + "This time we got 2 samples from state `A` and 8 samples from state `B`" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Comparing runtimes for these algorithms will not be useful, as each solves the filtering task efficiently for a different scenario.\n", + "
\n", + "`forward_backward` calculates the exact probability distribution.\n", + "
\n", + "`fixed_lag_smoothing` calculates an approximate distribution and its runtime will depend on the value of the lag chosen.\n", + "
\n", + "`particle_filtering` is an efficient method for approximating distributions for a very large or continuous state space." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## MONTE CARLO LOCALIZATION\n", + "In the domain of robotics, particle filtering is used for _robot localization_.\n", + "__Localization__ is the problem of finding out where things are, in this case, we want to find the position of a robot in a continuous state space.\n", + "
\n", + "__Monte Carlo Localization__ is an algorithm for robots to _localize_ using a _particle filter_.\n", + "Given a map of the environment, the algorithm estimates the position and orientation of a robot as it moves and senses the environment.\n", + "
\n", + "Initially, particles are distributed uniformly over the state space, ie the robot has no information of where it is and assumes it is equally likely to be at any point in space.\n", + "
\n", + "When the robot moves, it analyses the incoming evidence to shift and change the probability to better approximate the probability distribution of its position.\n", + "The particles are then resampled based on their weights.\n", + "
\n", + "Gradually, as more evidence comes in, the robot gets better at approximating its location and the particles converge towards the actual position of the robot.\n", + "
\n", + "The pose of a robot is defined by its two Cartesian coordinates with values $x$ and $y$ and its direction with value $\\theta$.\n", + "We use the kinematic equations of motion to model a deterministic state prediction.\n", + "This is our motion model (or transition model).\n", + "
\n", + "Next, we need a sensor model.\n", + "There can be two kinds of sensor models, the first assumes that the sensors detect _stable_, _recognizable_ features of the environment called __landmarks__.\n", + "The robot senses the location and bearing of each landmark and updates its belief according to that.\n", + "We can also assume the noise in measurements to be Gaussian, to simplify things.\n", + "
\n", + "Another kind of sensor model is used for an array of range sensors, each of which has a fixed bearing relative to the robot.\n", + "These sensors provide a set of range values in each direction.\n", + "This will also be corrupted by Gaussian noise, but we can assume that the errors for different beam directions are independent and identically distributed.\n", + "
\n", + "After evidence comes in, the robot updates its belief state and reweights the particle distribution to better aproximate the actual distribution.\n", + "
\n", + "
\n", + "Let's have a look at how this algorithm is implemented in the module" ] }, { "cell_type": "code", - "execution_count": 40, - "metadata": { - "collapsed": true - }, - "outputs": [], + "execution_count": 90, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + " Codestin Search App\n", + " \n", + " \n", + "\n", + "\n", + "

\n", + "\n", + "
def monte_carlo_localization(a, z, N, P_motion_sample, P_sensor, m, S=None):\n",
+       "    """Monte Carlo localization algorithm from Fig 25.9"""\n",
+       "\n",
+       "    def ray_cast(sensor_num, kin_state, m):\n",
+       "        return m.ray_cast(sensor_num, kin_state)\n",
+       "\n",
+       "    M = len(z)\n",
+       "    W = [0]*N\n",
+       "    S_ = [0]*N\n",
+       "    W_ = [0]*N\n",
+       "    v = a['v']\n",
+       "    w = a['w']\n",
+       "\n",
+       "    if S is None:\n",
+       "        S = [m.sample() for _ in range(N)]\n",
+       "\n",
+       "    for i in range(N):\n",
+       "        S_[i] = P_motion_sample(S[i], v, w)\n",
+       "        W_[i] = 1\n",
+       "        for j in range(M):\n",
+       "            z_ = ray_cast(j, S_[i], m)\n",
+       "            W_[i] = W_[i] * P_sensor(z[j], z_)\n",
+       "\n",
+       "    S = weighted_sample_with_replacement(N, S_, W_)\n",
+       "    return S\n",
+       "
\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ - "rain_true = [observation for observation in all_observations if observation['Rain'] == True]" + "psource(monte_carlo_localization)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Finally, we can find **P(Rain=True)**" - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.508\n" - ] - } - ], - "source": [ - "answer = len(rain_true) / N\n", - "print(answer)" + "Our implementation of Monte Carlo Localization uses the range scan method.\n", + "The `ray_cast` helper function casts rays in different directions and stores the range values.\n", + "
\n", + "`a` stores the `v` and `w` components of the robot's velocity.\n", + "
\n", + "`z` is a range scan.\n", + "
\n", + "`P_motion_sample` is the motion or transition model.\n", + "
\n", + "`P_sensor` is the range sensor noise model.\n", + "
\n", + "`m` is the 2D map of the environment\n", + "
\n", + "`S` is a vector of samples of size N" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "To evaluate a conditional distribution. We can use a two-step filtering process. We first separate out the variables that are consistent with the evidence. Then for each value of query variable, we can find probabilities. For example to find **P(Cloudy=True | Rain=True)**. We have already filtered out the values consistent with our evidence in **rain_true**. Now we apply a second filtering step on **rain_true** to find **P(Rain=True and Cloudy=True)**" + "We'll now define a simple 2D map to run Monte Carlo Localization on.\n", + "
\n", + "Let's say this is the map we want\n", + "
" ] }, { "cell_type": "code", - "execution_count": 42, - "metadata": {}, + "execution_count": 91, + "metadata": { + "scrolled": true + }, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.7755905511811023\n" - ] + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAfAAAAFYCAYAAACs465lAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAEfZJREFUeJzt3XuMpXddx/HP1x0aKAWp6QL2oqVaUCRy6UhAIiqFWC5SjEZBIUUxTUShEBAKJmBiYoga1ESDWQu2iQ2gpQpeuFQE0QQrswWEsiANLe1CpVMJF5FYCl//mLMwDjs72znPzpnf8HolmzmXZ87zfWZn5j3Pc848U90dAGAs37boAQCAu07AAWBAAg4AAxJwABiQgAPAgAQcAAYk4AAwIAGHXaiqbqqqx2+47dlV9S8TPHZX1ffO+zjAYgk4AAxIwGFAVXV6Vb2pqlar6saqev66+x5ZVe+tqs9V1a1V9UdVddLsvvfMFvtgVf13Vf1cVf1YVR2uqpdU1W2z93laVT2pqv6jqj5bVS8/nsef3d9V9fyq+kRV3V5Vv1tVvtfAxHxRwWBmMfybJB9MckaS85O8oKp+YrbIV5O8MMlpSR49u/+5SdLdj50t89DuPqW73zi7fv8kd5893iuS/GmSZyY5L8mPJHlFVZ2z1eOv81NJlpM8IsmFSX5pim0HvqGcCx12n6q6KWuBvHPdzScluS7Ji5L8ZXd/17rlX5bkgd39i0d5rBck+dHu/qnZ9U5ybnffMLv+Y0nemuSU7v5qVd0ryReSPKq7r50tczDJb3X3Xx/n4z+xu982u/7cJD/d3efP8SEBNlha9ADApp7W3f9w5EpVPTvJLyf57iSnV9Xn1i27L8k/z5Z7YJJXZ20P+OSsfZ0f3GJd/9XdX51d/vLs7WfW3f/lJKfchce/Zd3lTyY5fYv1A3eRQ+gwnluS3Njd91n3717d/aTZ/a9J8tGs7WXfO8nLk9SE6z+exz9r3eXvSvLpCdcPRMBhRP+W5AtV9dKqukdV7auqh1TVD83uP3II/L+r6vuS/MqG9/9MknOyfVs9fpL8elWdWlVnJbkkyRuPsgwwBwGHwcwOdf9kkocluTHJ7UkuS/Lts0VenOTnk3wxay9G2xjP30xyxexV5D+7jRG2evwkeXPWDqt/IMnfJXntNtYDHIMXsQGT2vgiOeDEsAcOAAMScAAYkEPoADAge+AAMCABB4AB7eiZ2E477bQ+++yzd3KVwB5w8OBWJ5JjK+edd96iRzghdvJzY6c+hjfddFNuv/32LU++tKPPgS8vL/fKysqOrQ/YG6qmPJHct6a9+nqnnfzc2KmP4fLyclZWVrbcMIfQAWBAAg4AAxJwABiQgAPAgAQcAAYk4AAwIAEHgAEJOAAMaK6AV9UFVfWxqrqhqi6daigA4Ni2HfCq2pfkj5M8McmDkzyjqh481WAAwObm2QN/ZJIbuvsT3X1HkjckuXCasQCAY5kn4GckuWXd9cOz2/6fqrq4qlaqamV1dXWO1QEAR8wT8KOdaP2bzvTe3Qe6e7m7l/fv3z/H6gCAI+YJ+OEkZ627fmaST883DgBwPOYJ+PuSnFtVD6iqk5I8PclbphkLADiWpe2+Y3ffWVW/luTtSfYleV13Xz/ZZADAprYd8CTp7r9P8vcTzQIAHCdnYgOAAQk4AAxIwAFgQAIOAAMScAAYkIADwIAEHAAGNNfvgQPARlVH+1MZTM0eOAAMSMABYEACDgADEnAAGJCAA8CABBwABiTgADAgAQeAAQk4AAxIwAFgQAIOAAMScAAYkIADwIAEHAAGJOAAMCABB4ABCTgADEjAAWBAAg4AAxJwABiQgAPAgAQcAAYk4AAwIAEHgAEJOAAMSMABYEBLO7mygwcPpqp2cpXwLaO7Fz0CsIPsgQPAgAQcAAYk4AAwIAEHgAEJOAAMSMABYEACDgADEnAAGJCAA8CABBwABrTtgFfVWVX1rqo6VFXXV9UlUw4GAGxunnOh35nkRd19XVXdK8nBqrqmuz8y0WwAwCa2vQfe3bd293Wzy19McijJGVMNBgBsbpK/RlZVZyd5eJJrj3LfxUkunmI9AMCauQNeVackeVOSF3T3Fzbe390HkhyYLevvHQLABOZ6FXpV3S1r8b6yu6+eZiQAYCvzvAq9krw2yaHufvV0IwEAW5lnD/wxSZ6V5HFV9YHZvydNNBcAcAzbfg68u/8lSU04CwBwnJyJDQAGJOAAMCABB4ABCTgADEjAAWBAAg4AAxJwABjQJH/M5Hidd955WVlZ2clVAsCeZA8cAAYk4AAwIAEHgAEJOAAMSMABYEACDgADEnAAGJCAA8CABBwABiTgADAgAQeAAQk4AAxIwAFgQAIOAAMScAAYkIADwIAEHAAGJOAAMCABB4ABCTgADEjAAWBAAg4AAxJwABiQgAPAgAQcAAYk4AAwoKVFD3CiVNWiRwCAE8YeOAAMSMABYEACDgADEnAAGJCAA8CABBwABiTgADAgAQeAAQk4AAxIwAFgQHMHvKr2VdX7q+pvpxgIANjaFHvglyQ5NMHjAADHaa6AV9WZSZ6c5LJpxgEAjse8e+B/kOQlSb622QJVdXFVrVTVyurq6pyrAwCSOQJeVU9Jclt3HzzWct19oLuXu3t5//79210dALDOPHvgj0ny1Kq6Kckbkjyuqv58kqkAgGPadsC7+2XdfWZ3n53k6Un+sbufOdlkAMCm/B44AAxoaYoH6e53J3n3FI8FAGzNHjgADEjAAWBAAg4AAxJwABiQgAPAgAQcAAYk4AAwoEl+D3w36u5FjwBMpKoWPQLsOvbAAWBAAg4AAxJwABiQgAPAgAQcAAYk4AAwIAEHgAEJOAAMSMABYEACDgADEnAAGJCAA8CABBwABiTgADAgAQeAAQk4AAxIwAFgQAIOAAMScAAYkIADwIAEHAAGJOAAMCABB4ABCTgADEjAAWBAAg4AAxJwABiQgAPAgAQcAAYk4AAwIAEHgAEJOAAMSMABYEACDgADEnAAGJCAA8CABBwABjRXwKvqPlV1VVV9tKoOVdWjpxoMANjc0pzv/4dJ3tbdP1NVJyU5eYKZAIAtbDvgVXXvJI9N8uwk6e47ktwxzVgAwLHMcwj9nCSrSf6sqt5fVZdV1T03LlRVF1fVSlWtrK6uzrE6AOCIeQK+lOQRSV7T3Q9P8qUkl25cqLsPdPdydy/v379/jtUBAEfME/DDSQ5397Wz61dlLegAwAm27YB3938muaWqHjS76fwkH5lkKgDgmOZ9Ffrzklw5ewX6J5L84vwjAQBbmSvg3f2BJMsTzQIAHCdnYgOAAQk4AAxIwAFgQAIOAAMScAAYkIADwIAEHAAGJOAAMKB5z8RGkqpa9AjsUt296BGAPcoeOAAMSMABYEACDgADEnAAGJCAA8CABBwABiTgADAgAQeAAQk4AAxIwAFgQAIOAAMScAAYkIADwIAEHAAGJOAAMCABB4ABCTgADEjAAWBAAg4AAxJwABiQgAPAgAQcAAYk4AAwIAEHgAEJOAAMSMABYEBLix4AYCvdvegRuAt28v+rqnZsXbuNPXAAGJCAA8CABBwABiTgADAgAQeAAQk4AAxIwAFgQAIOAAMScAAY0FwBr6oXVtX1VfXhqnp9Vd19qsEAgM1tO+BVdUaS5ydZ7u6HJNmX5OlTDQYAbG7eQ+hLSe5RVUtJTk7y6flHAgC2su2Ad/enkvxekpuT3Jrk8939jo3LVdXFVbVSVSurq6vbnxQA+Lp5DqGfmuTCJA9IcnqSe1bVMzcu190Hunu5u5f379+//UkBgK+b5xD645Pc2N2r3f2VJFcn+eFpxgIAjmWegN+c5FFVdXKt/UHW85McmmYsAOBY5nkO/NokVyW5LsmHZo91YKK5AIBjWJrnnbv7lUleOdEsAMBxciY2ABiQgAPAgAQcAAYk4AAwIAEHgAEJOAAMSMABYEACDgADmutELqzp7kWPALBrrJ1dmxPNHjgADEjAAWBAAg4AAxJwABiQgAPAgAQcAAYk4AAwIAEHgAEJOAAMSMABYEACDgADEnAAGJCAA8CABBwABiTgADAgAQeAAQk4AAxIwAFgQAIOAAMScAAYkIADwIAEHAAGJOAAMCABB4ABCTgADEjAAWBAS4seYC+oqkWPwC7V3YseYU/wNTa/nfxc3Ml1fSt/btgDB4ABCTgADEjAAWBAAg4AAxJwABiQgAPAgAQcAAYk4AAwIAEHgAFtGfCqel1V3VZVH15323dU1TVV9fHZ21NP7JgAwHrHswd+eZILNtx2aZJ3dve5Sd45uw4A7JAtA97d70ny2Q03X5jkitnlK5I8beK5AIBj2O5z4Pfr7luTZPb2vpstWFUXV9VKVa2srq5uc3UAwHon/EVs3X2gu5e7e3n//v0nenUA8C1huwH/TFV9Z5LM3t423UgAwFa2G/C3JLlodvmiJG+eZhwA4Hgcz6+RvT7Je5M8qKoOV9VzkrwqyROq6uNJnjC7DgDskKWtFujuZ2xy1/kTzwIAHCdnYgOAAQk4AAxIwAFgQAIOAAMScAAYkIADwIAEHAAGJOAAMKDq7p1bWdVqkk/exXc7LcntJ2CcRbNdY7FdY9mr25Xs3W2zXd/w3d295V//2tGAb0dVrXT38qLnmJrtGovtGste3a5k726b7brrHEIHgAEJOAAMaISAH1j0ACeI7RqL7RrLXt2uZO9um+26i3b9c+AAwDcbYQ8cANhgVwe8qi6oqo9V1Q1Vdemi55lCVZ1VVe+qqkNVdX1VXbLomaZUVfuq6v1V9beLnmUqVXWfqrqqqj46+3979KJnmkJVvXD2Ofjhqnp9Vd190TNtR1W9rqpuq6oPr7vtO6rqmqr6+OztqYuccTs22a7fnX0e/ntV/VVV3WeRM27H0bZr3X0vrqquqtMWMds8NtuuqnrerGPXV9XvTLnOXRvwqtqX5I+TPDHJg5M8o6oevNipJnFnkhd19/cneVSSX90j23XEJUkOLXqIif1hkrd19/cleWj2wPZV1RlJnp9kubsfkmRfkqcvdqptuzzJBRtuuzTJO7v73CTvnF0fzeX55u26JslDuvsHk/xHkpft9FATuDzfvF2pqrOSPCHJzTs90EQuz4btqqofT3Jhkh/s7h9I8ntTrnDXBjzJI5Pc0N2f6O47krwhax+IoXX3rd193ezyF7MWgzMWO9U0qurMJE9OctmiZ5lKVd07yWOTvDZJuvuO7v7cYqeazFKSe1TVUpKTk3x6wfNsS3e/J8lnN9x8YZIrZpevSPK0HR1qAkfbru5+R3ffObv6r0nO3PHB5rTJ/1eS/H6SlyQZ8oVZm2zXryR5VXf/72yZ26Zc524O+BlJbll3/XD2SOiOqKqzkzw8ybWLnWQyf5C1L8CvLXqQCZ2TZDXJn82eGrisqu656KHm1d2fytrewM1Jbk3y+e5+x2KnmtT9uvvWZO2H5iT3XfA8J8IvJXnrooeYQlU9NcmnuvuDi55lYg9M8iNVdW1V/VNV/dCUD76bA15HuW3In8yOpqpOSfKmJC/o7i8sep55VdVTktzW3QcXPcvElpI8IslruvvhSb6UMQ/H/j+z54QvTPKAJKcnuWdVPXOxU3G8quo3svZ03JWLnmVeVXVykt9I8opFz3ICLCU5NWtPl/56kr+oqqO1bVt2c8APJzlr3fUzM+ghvo2q6m5Zi/eV3X31oueZyGOSPLWqbsra0x2Pq6o/X+xIkzic5HB3HzlKclXWgj66xye5sbtXu/srSa5O8sMLnmlKn6mq70yS2dtJD10uUlVdlOQpSX6h98bvAX9P1n6Q/ODs+8eZSa6rqvsvdKppHE5yda/5t6wdnZzsBXq7OeDvS3JuVT2gqk7K2gts3rLgmeY2++nrtUkOdferFz3PVLr7Zd19ZnefnbX/q3/s7uH36Lr7P5PcUlUPmt10fpKPLHCkqdyc5FFVdfLsc/L87IEX563zliQXzS5flOTNC5xlMlV1QZKXJnlqd//PoueZQnd/qLvv291nz75/HE7yiNnX3uj+OsnjkqSqHpjkpEz4B1t2bcBnL9T4tSRvz9o3lr/o7usXO9UkHpPkWVnbQ/3A7N+TFj0Ux/S8JFdW1b8neViS317wPHObHVG4Ksl1ST6Ute8FQ54Jq6pen+S9SR5UVYer6jlJXpXkCVX18ay9svlVi5xxOzbZrj9Kcq8k18y+d/zJQofchk22a3ibbNfrkpwz+9WyNyS5aMqjJs7EBgAD2rV74ADA5gQcAAYk4AAwIAEHgAEJOAAMSMABYEACDgADEnAAGND/Adcj4cKAmSYuAAAAAElFTkSuQmCC\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" } ], "source": [ - "rain_and_cloudy = [observation for observation in rain_true if observation['Cloudy'] == True]\n", - "answer = len(rain_and_cloudy) / len(rain_true)\n", - "print(answer)" + "m = MCLmap([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0],\n", + " [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0],\n", + " [1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0],\n", + " [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0],\n", + " [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0],\n", + " [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0],\n", + " [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0],\n", + " [0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0],\n", + " [0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0],\n", + " [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0],\n", + " [0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0]])\n", + "\n", + "heatmap(m.m, cmap='binary')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Rejection Sampling\n", - "\n", - "Rejection Sampling is based on an idea similar to what we did just now. First, it generates samples from the prior distribution specified by the network. Then, it rejects all those that do not match the evidence. The function **rejection_sampling** implements the algorithm described by **Figure 14.14**" + "Let's define the motion model as a function `P_motion_sample`." ] }, { "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, + "execution_count": 92, + "metadata": {}, "outputs": [], "source": [ - "psource(rejection_sampling)" + "def P_motion_sample(kin_state, v, w):\n", + " \"\"\"Sample from possible kinematic states.\n", + " Returns from a single element distribution (no uncertainity in motion)\"\"\"\n", + " pos = kin_state[:2]\n", + " orient = kin_state[2]\n", + "\n", + " # for simplicity the robot first rotates and then moves\n", + " orient = (orient + w)%4\n", + " for _ in range(orient):\n", + " v = (v[1], -v[0])\n", + " pos = vector_add(pos, v)\n", + " return pos + (orient,)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "The function keeps counts of each of the possible values of the Query variable and increases the count when we see an observation consistent with the evidence. It takes in input parameters **X** - The Query Variable, **e** - evidence, **bn** - Bayes net and **N** - number of prior samples to generate.\n", - "\n", - "**consistent_with** is used to check consistency." + "Define the sensor model as a function `P_sensor`." ] }, { "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, + "execution_count": 93, + "metadata": {}, "outputs": [], "source": [ - "psource(consistent_with)" + "def P_sensor(x, y):\n", + " \"\"\"Conditional probability for sensor reading\"\"\"\n", + " # Need not be exact probability. Can use a scaled value.\n", + " if x == y:\n", + " return 0.8\n", + " elif abs(x - y) <= 2:\n", + " return 0.05\n", + " else:\n", + " return 0" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "To answer **P(Cloudy=True | Rain=True)**" + "Initializing variables." ] }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 94, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0.7835249042145593" - ] - }, - "execution_count": 43, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "p = rejection_sampling('Cloudy', dict(Rain=True), sprinkler, 1000)\n", - "p[True]" + "a = {'v': (0, 0), 'w': 0}\n", + "z = (2, 4, 1, 6)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Likelihood Weighting\n", - "\n", - "Rejection sampling tends to reject a lot of samples if our evidence consists of a large number of variables. Likelihood Weighting solves this by fixing the evidence (i.e. not sampling it) and then using weights to make sure that our overall sampling is still consistent.\n", - "\n", - "The pseudocode in **Figure 14.15** is implemented as **likelihood_weighting** and **weighted_sample**." + "Let's run `monte_carlo_localization` with these parameters to find a sample distribution S." ] }, { "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, + "execution_count": 95, + "metadata": {}, "outputs": [], "source": [ - "psource(weighted_sample)" + "S = monte_carlo_localization(a, z, 1000, P_motion_sample, P_sensor, m)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "\n", - "**weighted_sample** samples an event from Bayesian Network that's consistent with the evidence **e** and returns the event and its weight, the likelihood that the event accords to the evidence. It takes in two parameters **bn** the Bayesian Network and **e** the evidence.\n", - "\n", - "The weight is obtained by multiplying **P(xi | parents(xi))** for each node in evidence. We set the values of **event = evidence** at the start of the function." + "Let's plot the values in the sample distribution `S`." ] }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 96, "metadata": {}, "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "GRID:\n", + " 0 0 9 41 123 12 1 0 0 0 0 0 0 0 0 0 0\n", + " 0 0 0 0 2 107 56 4 0 0 0 0 0 0 0 0 0\n", + " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 0 0 0 5 4 9 2 0 0 0 0 0 0 0 0 0 0\n", + " 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 0 0 10 260 135 5 0 0 0 0 0 0 0 0 0 0 0\n", + " 0 0 0 0 5 34 50 0 0 0 0 0 0 0 0 0 0\n", + "79 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "26 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0\n", + " 0 0 0 3 2 10 0 0 0 0 0 0 0 0 0 0 0\n", + " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n" + ] + }, { "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAfAAAAFYCAYAAACs465lAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAEqpJREFUeJzt3X+w5Xdd3/HXe3eT5heYmA1okoWQNoCUUUkvlB+VWgLTgEhg2mmhDRPQTma0QGBQDNpBO850mOpQndHBiQGTGTOgDSngLySiljJDo5sAQliUDInJQiS7ixhEbFjy7h/3rF6XvXt37/nuOfu5eTxmdu758b3n8/7u/fG833POPbe6OwDAWLYtewAA4PgJOAAMSMABYEACDgADEnAAGJCAA8CABBwABiTgcBKqqnuq6vmHXfaqqvrIBLfdVfVP5r0dYLkEHAAGJOAwoKo6v6reU1X7quruqnrdmuueUVUfraovV9X9VfULVXXq7LoPzzb7RFX9dVX9+6r63qraW1VvqqoHZu/z0qp6UVX9WVV9qap+/Fhuf3Z9V9XrqupzVbW/qn6mqnyvgYn5ooLBzGL4G0k+keSCJJcleX1V/evZJt9I8oYkO5M8a3b9DydJdz93ts13dfdZ3f1rs/PfluS02e29JckvJ7kyyT9L8j1J3lJVF290+2u8LMlKkkuTXJHkB6bYd+DvlddCh5NPVd2T1UAeXHPxqUnuSPLGJP+zux+3Zvs3J3lid7/6CLf1+iT/srtfNjvfSS7p7rtm5783ye8kOau7v1FVj0ryYJJndvdts21uT/LT3f3eY7z9F3b3B2bnfzjJv+nuy+b4LwEOs2PZAwDreml3/96hM1X1qiT/Kcnjk5xfVV9es+32JP9ntt0Tk7wtq0fAZ2T16/z2DdY60N3fmJ3+2uztF9dc/7UkZx3H7d+35vSfJzl/g/WB4+QudBjPfUnu7u6z1/x7VHe/aHb925N8JqtH2Y9O8uNJasL1j+X2d605/bgkX5hwfSACDiP6oyQPVtWPVdXpVbW9qp5aVU+fXX/oLvC/rqonJ/mhw97/i0kuzuZtdPtJ8qNVdU5V7UpyTZJfO8I2wBwEHAYzu6v7+5N8d5K7k+xPcn2Sb5lt8iNJ/kOSr2T1yWiHx/Onktw4exb5v9vECBvdfpK8L6t3q388yW8leccm1gGOwpPYgEkd/iQ54MRwBA4AAxJwABiQu9ABYECOwAFgQAIOAANa6Cux7dx5bl/0uF0bbziafnhxa33jocWt9eUFvvbGuRctbi1/VwM4id1z733Zv//Ahi++tNCAX/S4Xdn9kd/beMPB9MG/Xdxaf3n3wtbKb/z0wpaqK395cWudeubC1gI4Xiv/4vnHtJ1DEQAYkIADwIAEHAAGJOAAMCABB4ABCTgADEjAAWBAAg4AA5or4FV1eVX9aVXdVVXXTjUUAHB0mw54VW1P8otJXpjkKUleUVVPmWowAGB98xyBPyPJXd39ue5+KMm7k1wxzVgAwNHME/ALkty35vze2WX/QFVdXVW7q2r3vv0H5lgOADhknoAf6S+l9Ddd0H1dd69098p5O8+dYzkA4JB5Ar43ydq/DXphkgX+/UkAeOSaJ+B/nOSSqnpCVZ2a5OVJ3j/NWADA0Wz674F398Gqek2S302yPck7u/vOySYDANa16YAnSXf/dpLfnmgWAOAYeSU2ABiQgAPAgAQcAAYk4AAwIAEHgAEJOAAMSMABYEBz/R44q2rHaYtb67zvWNha/eqbFrfWe9+4uLUufdnC1qpdz17cWtu2L2wtYPkcgQPAgAQcAAYk4AAwIAEHgAEJOAAMSMABYEACDgADEnAAGJCAA8CABBwABiTgADAgAQeAAQk4AAxIwAFgQAIOAAMScAAYkIADwIAEHAAGJOAAMCABB4ABCTgADEjAAWBAAg4AAxJwABiQgAPAgAQcAAa0Y9kDcPKqqsWt9bK3LWwtgK3AETgADEjAAWBAAg4AAxJwABiQgAPAgAQcAAYk4AAwIAEHgAEJOAAMSMABYECbDnhV7aqqP6iqPVV1Z1VdM+VgAMD65nkt9INJ3tjdd1TVo5LcXlW3dvenJ5oNAFjHpo/Au/v+7r5jdvorSfYkuWCqwQCA9U3yGHhVXZTkaUluO8J1V1fV7qravW//gSmWA4BHvLkDXlVnJXlPktd394OHX9/d13X3SnevnLfz3HmXAwAyZ8Cr6pSsxvum7r5lmpEAgI3M8yz0SvKOJHu6+23TjQQAbGSeI/DnJHllkudV1cdn/1400VwAwFFs+tfIuvsjSWrCWQCAY+SV2ABgQAIOAAMScAAYkIADwIAEHAAGJOAAMCABB4ABzfPnRFmCfvjgAldb4K/5P/z1xa21/R8tbKnVFywEmJ4jcAAYkIADwIAEHAAGJOAAMCABB4ABCTgADEjAAWBAAg4AAxJwABiQgAPAgAQcAAYk4AAwIAEHgAEJOAAMSMABYEACDgADEnAAGJCAA8CABBwABiTgADAgAQeAAQk4AAxIwAFgQAIOAAMScAAYkIADwIB2LHsAjk9t26Ifsm3blz0BwFAcgQPAgAQcAAYk4AAwIAEHgAEJOAAMSMABYEACDgADEnAAGJCAA8CABBwABjR3wKtqe1V9rKp+c4qBAICNTXEEfk2SPRPcDgBwjOYKeFVdmOT7klw/zTgAwLGY9wj855K8KcnD621QVVdX1e6q2r1v/4E5lwMAkjkCXlUvTvJAd99+tO26+7ruXunulfN2nrvZ5QCANeY5An9OkpdU1T1J3p3keVX1q5NMBQAc1aYD3t1v7u4Lu/uiJC9P8vvdfeVkkwEA6/J74AAwoB1T3Eh3/2GSP5zitgCAjTkCB4ABCTgADEjAAWBAAg4AAxJwABiQgAPAgAQcAAY0ye+BP9L117+2sLX+6z+/eGFr/ZdXXbqwtba/5n0LW6u2+bQHxucIHAAGJOAAMCABB4ABCTgADEjAAWBAAg4AAxJwABiQgAPAgAQcAAYk4AAwIAEHgAEJOAAMSMABYEACDgADEnAAGJCAA8CABBwABiTgADAgAQeAAQk4AAxIwAFgQAIOAAMScAAYkIADwIAEHAAGJOAAMKAdyx5gK6hTTl/YWj91x/0LW6sfPri4tR78/OLW+uoDC1tr2wVPX9hawCOLI3AAGJCAA8CABBwABiTgADAgAQeAAQk4AAxIwAFgQAIOAAMScAAYkIADwIDmCnhVnV1VN1fVZ6pqT1U9a6rBAID1zfta6D+f5APd/W+r6tQkZ0wwEwCwgU0HvKoeneS5SV6VJN39UJKHphkLADiaee5CvzjJviS/UlUfq6rrq+rMwzeqqqurandV7d63/8AcywEAh8wT8B1JLk3y9u5+WpKvJrn28I26+7ruXunulfN2njvHcgDAIfMEfG+Svd192+z8zVkNOgBwgm064N39F0nuq6onzS66LMmnJ5kKADiqeZ+F/tokN82egf65JK+efyQAYCNzBby7P55kZaJZAIBj5JXYAGBAAg4AAxJwABiQgAPAgAQcAAYk4AAwIAEHgAEJOAAMaN5XYjs+f3MgD+++YSFL1aWvXMg6SVLbti9srUWqbYv79KizH7+wtbLItQBOEEfgADAgAQeAAQk4AAxIwAFgQAIOAAMScAAYkIADwIAEHAAGJOAAMCABB4ABCTgADEjAAWBAAg4AAxJwABiQgAPAgAQcAAYk4AAwIAEHgAEJOAAMSMABYEACDgADEnAAGJCAA8CABBwABiTgADAgAQeAAe1Y6GqnPTr15BcuZKnatn0h6wDAMjgCB4ABCTgADEjAAWBAAg4AAxJwABiQgAPAgAQcAAYk4AAwIAEHgAHNFfCqekNV3VlVn6qqd1XVaVMNBgCsb9MBr6oLkrwuyUp3PzXJ9iQvn2owAGB9896FviPJ6VW1I8kZSb4w/0gAwEY2HfDu/nySn01yb5L7k/xVd3/w8O2q6uqq2l1Vu/cd+MvNTwoA/J157kI/J8kVSZ6Q5PwkZ1bVlYdv193XdfdKd6+cd+45m58UAPg789yF/vwkd3f3vu7+epJbkjx7mrEAgKOZJ+D3JnlmVZ1RVZXksiR7phkLADiaeR4Dvy3JzUnuSPLJ2W1dN9FcAMBR7Jjnnbv7J5P85ESzAADHyCuxAcCABBwABiTgADAgAQeAAQk4AAxIwAFgQAIOAAMScAAY0Fwv5HLctp2SOuuxC11yq+l+eIGr1eKWOvi3C1uqTjl9YWsBnCiOwAFgQAIOAAMScAAYkIADwIAEHAAGJOAAMCABB4ABCTgADEjAAWBAAg4AAxJwABiQgAPAgAQcAAYk4AAwIAEHgAEJOAAMSMABYEACDgADEnAAGJCAA8CABBwABiTgADAgAQeAAQk4AAxIwAFgQAIOAAPasewBOD5VW/RnrlNOX/YEAEPZojUAgK1NwAFgQAIOAAMScAAYkIADwIAEHAAGJOAAMCABB4ABCTgADGjDgFfVO6vqgar61JrLvrWqbq2qz87ennNixwQA1jqWI/Abklx+2GXXJvlQd1+S5EOz8wDAgmwY8O7+cJIvHXbxFUlunJ2+MclLJ54LADiKzT4G/tjuvj9JZm8fs96GVXV1Ve2uqt379h/Y5HIAwFon/Els3X1dd69098p5O8890csBwCPCZgP+xar69iSZvX1gupEAgI1sNuDvT3LV7PRVSd43zTgAwLE4ll8je1eSjyZ5UlXtraofTPLWJC+oqs8mecHsPACwIDs22qC7X7HOVZdNPAsAcIy8EhsADEjAAWBAAg4AAxJwABiQgAPAgAQcAAYk4AAwIAEHgAFVdy9usap9Sf78ON9tZ5L9J2CcZbNfY7FfY9mq+5Vs3X2zX3/v8d193kYbLTTgm1FVu7t7ZdlzTM1+jcV+jWWr7leydffNfh0/d6EDwIAEHAAGNELAr1v2ACeI/RqL/RrLVt2vZOvum/06Tif9Y+AAwDcb4QgcADjMSR3wqrq8qv60qu6qqmuXPc8UqmpXVf1BVe2pqjur6pplzzSlqtpeVR+rqt9c9ixTqaqzq+rmqvrM7OP2rGXPNIWqesPsc/BTVfWuqjpt2TNtRlW9s6oeqKpPrbnsW6vq1qr67OztOcuccTPW2a+fmX0e/klV/a+qOnuZM27GkfZrzXU/UlVdVTuXMds81tuvqnrtrGN3VtV/n3LNkzbgVbU9yS8meWGSpyR5RVU9ZblTTeJgkjd293ckeWaS/7xF9uuQa5LsWfYQE/v5JB/o7icn+a5sgf2rqguSvC7JSnc/Ncn2JC9f7lSbdkOSyw+77NokH+ruS5J8aHZ+NDfkm/fr1iRP7e7vTPJnSd686KEmcEO+eb9SVbuSvCDJvYseaCI35LD9qqp/leSKJN/Z3f80yc9OueBJG/Akz0hyV3d/rrsfSvLurP5HDK277+/uO2anv5LVGFyw3KmmUVUXJvm+JNcve5apVNWjkzw3yTuSpLsf6u4vL3eqyexIcnpV7UhyRpIvLHmeTenuDyf50mEXX5HkxtnpG5O8dKFDTeBI+9XdH+zug7Oz/zfJhQsfbE7rfLyS5H8keVOSIZ+Ytc5+/VCSt3b3/5tt88CUa57MAb8gyX1rzu/NFgndIVV1UZKnJbltuZNM5uey+gX48LIHmdDFSfYl+ZXZQwPXV9WZyx5qXt39+aweDdyb5P4kf9XdH1zuVJN6bHffn6z+0JzkMUue50T4gSS/s+whplBVL0ny+e7+xLJnmdgTk3xPVd1WVf+7qp4+5Y2fzAGvI1w25E9mR1JVZyV5T5LXd/eDy55nXlX14iQPdPfty55lYjuSXJrk7d39tCRfzZh3x/4Ds8eEr0jyhCTnJzmzqq5c7lQcq6r6iaw+HHfTsmeZV1WdkeQnkrxl2bOcADuSnJPVh0t/NMmvV9WR2rYpJ3PA9ybZteb8hRn0Lr7DVdUpWY33Td19y7Lnmchzkrykqu7J6sMdz6uqX13uSJPYm2Rvdx+6l+TmrAZ9dM9Pcnd37+vurye5JcmzlzzTlL5YVd+eJLO3k951uUxVdVWSFyf5j701fg/4H2f1B8lPzL5/XJjkjqr6tqVONY29SW7pVX+U1XsnJ3uC3skc8D9OcklVPaGqTs3qE2zev+SZ5jb76esdSfZ099uWPc9UuvvN3X1hd1+U1Y/V73f38Ed03f0XSe6rqifNLrosyaeXONJU7k3yzKo6Y/Y5eVm2wJPz1nh/kqtmp69K8r4lzjKZqro8yY8leUl3/82y55lCd3+yux/T3RfNvn/sTXLp7GtvdO9N8rwkqaonJjk1E/7BlpM24LMnarwmye9m9RvLr3f3ncudahLPSfLKrB6hfnz270XLHoqjem2Sm6rqT5J8d5L/tuR55ja7R+HmJHck+WRWvxcM+UpYVfWuJB9N8qSq2ltVP5jkrUleUFWfzeozm9+6zBk3Y539+oUkj0py6+x7xy8tdchNWGe/hrfOfr0zycWzXy17d5KrprzXxCuxAcCATtojcABgfQIOAAMScAAYkIADwIAEHAAGJOAAMCABB4ABCTgADOj/A0dU7lEBXyEDAAAAAElFTkSuQmCC\n", "text/plain": [ - "({'Cloudy': True, 'Rain': True, 'Sprinkler': False, 'WetGrass': True}, 0.8)" + "" ] }, - "execution_count": 44, "metadata": {}, - "output_type": "execute_result" + "output_type": "display_data" } ], "source": [ - "weighted_sample(sprinkler, dict(Rain=True))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "psource(likelihood_weighting)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**likelihood_weighting** implements the algorithm to solve our inference problem. The code is similar to **rejection_sampling** but instead of adding one for each sample we add the weight obtained from **weighted_sampling**." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "likelihood_weighting('Cloudy', dict(Rain=True), sprinkler, 200).show_approx()" + "grid = [[0]*17 for _ in range(11)]\n", + "for x, y, _ in S:\n", + " if 0 <= x < 11 and 0 <= y < 17:\n", + " grid[x][y] += 1\n", + "print(\"GRID:\")\n", + "print_table(grid)\n", + "heatmap(grid, cmap='Oranges')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Gibbs Sampling\n", - "\n", - "In likelihood sampling, it is possible to obtain low weights in cases where the evidence variables reside at the bottom of the Bayesian Network. This can happen because influence only propagates downwards in likelihood sampling.\n", - "\n", - "Gibbs Sampling solves this. The implementation of **Figure 14.16** is provided in the function **gibbs_ask** " - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "psource(gibbs_ask)" + "The distribution is highly concentrated at `(5, 3)`, but the robot is not very confident about its position as some other cells also have high probability values." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "In **gibbs_ask** we initialize the non-evidence variables to random values. And then select non-evidence variables and sample it from **P(Variable | value in the current state of all remaining vars) ** repeatedly sample. In practice, we speed this up by using **markov_blanket_sample** instead. This works because terms not involving the variable get canceled in the calculation. The arguments for **gibbs_ask** are similar to **likelihood_weighting**" + "Let's look at another scenario." ] }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 97, "metadata": {}, "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "GRID:\n", + "0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0\n", + "0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "0 0 0 0 0 0 0 999 0 0 0 0 0 0 0 0 0\n", + "0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n" + ] + }, { "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAfAAAAFYCAYAAACs465lAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAEW5JREFUeJzt3X+s7wdd3/HXe702UAqj9halP7B0FhwjKt2VgEzmKGQFGcVs2WDDFHVpohMKQbFogiRLFjIN00TD0hVsExtQSyfMKVJRx0hY9baAUIpCaG0vVHpvCYLODMH3/jjf6vHSc8/t+X56v/d9eTySk/P98Tmfz/tz7znneT6f7/d8T3V3AIBZ/t6mBwAAHjoBB4CBBBwABhJwABhIwAFgIAEHgIEEHAAGEnA4CVXVXVX13KNue3lVvX+BdXdVffO66wE2S8ABYCABh4Gq6tyqekdVHa6qO6vqldvue3pVfaCqPl9V91bVz1fV6av73rda7MNV9edV9W+q6rur6lBVvbaq7lt9zIur6gVV9cdV9bmq+onjWf/q/q6qV1bVp6rqSFX9dFX5XgML80UFw6xi+D+SfDjJeUkuTfKqqvrnq0W+kuTVSfYneebq/h9Oku5+9mqZb+vuM7v7l1fXvzHJI1bre32S/5bkZUn+cZLvSvL6qrpot/Vv871JDiS5JMnlSX5giX0H/lZ5LXQ4+VTVXdkK5Je33Xx6ktuSvCbJr3b3E7Yt/7okT+ru73+Qdb0qyT/t7u9dXe8kF3f3J1fXvzvJbyY5s7u/UlWPTvKFJM/o7ltWy9ya5D92968d5/qf393vXl3/4ST/srsvXeOfBDjKvk0PAOzoxd392w9cqaqXJ/n3Sb4pyblV9flty56W5H+vlntSkjdl6wj4jGx9nd+6y7bu7+6vrC7/5er9Z7fd/5dJznwI679n2+U/SXLuLtsHHiKn0GGee5Lc2d2P3fb26O5+wer+Nyf5eLaOsh+T5CeS1ILbP571X7Dt8hOSfGbB7QMRcJjo95N8oap+vKoeWVWnVdVTq+o7Vvc/cAr8z6vqW5L80FEf/9kkF2Xvdlt/kvxYVZ1VVRckuSrJLz/IMsAaBByGWZ3q/hdJvj3JnUmOJLk2yd9fLfKjSf5tki9m68loR8fzDUmuXz2L/F/vYYTd1p8k78zWafUPJfmfSd6yh+0Ax+BJbMCijn6SHPDwcAQOAAMJOAAM5BQ6AAzkCBwABhJwABjohL4S2/79Z/eFT7hg9wUB4GvUXXffkyNH7t/1xZdOaMAvfMIFOfj+3959QQD4GnXgnzz3uJZzCh0ABhJwABhIwAFgIAEHgIEEHAAGEnAAGEjAAWAgAQeAgdYKeFVdVlV/VFWfrKqrlxoKADi2PQe8qk5L8gtJnp/kKUleWlVPWWowAGBn6xyBPz3JJ7v7U939pSRvT3L5MmMBAMeyTsDPS3LPtuuHVrf9HVV1ZVUdrKqDh4/cv8bmAIAHrBPwB/tLKf1VN3Rf090HuvvAOfvPXmNzAMAD1gn4oSTb/zbo+Uk+s944AMDxWCfgf5Dk4qp6YlWdnuQlSd61zFgAwLHs+e+Bd/eXq+pHkvxWktOSvLW7b19sMgBgR3sOeJJ0928k+Y2FZgEAjpNXYgOAgQQcAAYScAAYSMABYCABB4CBBBwABhJwABhIwAFgIAEHgIEEHAAGEnAAGEjAAWAgAQeAgQQcAAYScAAYSMABYCABB4CBBBwABhJwABhIwAFgIAEHgIEEHAAGEnAAGEjAAWAgAQeAgQQcAAYScAAYSMABYCABB4CBBBwABhJwABhIwAFgIAEHgIEEHAAGEnAAGEjAAWAgAQeAgQQcAAYScAAYSMABYCABB4CBBBwABhJwABhIwAFgIAEHgIH2HPCquqCqfreq7qiq26vqqiUHAwB2tm+Nj/1yktd0921V9egkt1bVzd39sYVmAwB2sOcj8O6+t7tvW13+YpI7kpy31GAAwM4WeQy8qi5M8rQktzzIfVdW1cGqOnj4yP1LbA4AvuatHfCqOjPJO5K8qru/cPT93X1Ndx/o7gPn7D973c0BAFkz4FX1ddmK9w3dfdMyIwEAu1nnWeiV5C1J7ujuNy03EgCwm3WOwJ+V5PuSPKeqPrR6e8FCcwEAx7DnXyPr7vcnqQVnAQCOk1diA4CBBBwABhJwABhIwAFgIAEHgIEEHAAGEnAAGEjAAWAgAQeAgQQcAAYScAAYSMABYCABB4CBBBwABhJwABhIwAFgIAEHgIEEHAAGEnAAGEjAAWAgAQeAgQQcAAYScAAYSMABYCABB4CBBBwABhJwABhIwAFgIAEHgIEEHAAGEnAAGEjAAWAgAQeAgQQcAAYScAAYSMABYCABB4CBBBwABhJwABhIwAFgIAEHgIEEHAAGEnAAGEjAAWAgAQeAgdYOeFWdVlUfrKpfX2IgAGB3SxyBX5XkjgXWAwAcp7UCXlXnJ/meJNcuMw4AcDzWPQL/2SSvTfLXOy1QVVdW1cGqOnj4yP1rbg4ASNYIeFW9MMl93X3rsZbr7mu6+0B3Hzhn/9l73RwAsM06R+DPSvKiqroryduTPKeqfmmRqQCAY9pzwLv7dd19fndfmOQlSX6nu1+22GQAwI78HjgADLRviZV09+8l+b0l1gUA7M4ROAAMJOAAMJCAA8BAAg4AAwk4AAwk4AAwkIADwEACDgADCTgADCTgADCQgAPAQAIOAAMJOAAMJOAAMJCAA8BAAg4AAwk4AAwk4AAwkIADwEACDgADCTgADCTgADCQgAPAQAIOAAMJOAAMJOAAMJCAA8BAAg4AAwk4AAwk4AAwkIADwEACDgADCTgADCTgADDQvk0PAKeyN1zy+BO3rdvuPWHbAjbPETgADCTgADCQgAPAQAIOAAMJOAAMJOAAMJCAA8BAAg4AAwk4AAwk4AAw0FoBr6rHVtWNVfXxqrqjqp651GAAwM7WfS30n0vy7u7+V1V1epIzFpgJANjFngNeVY9J8uwkL0+S7v5Ski8tMxYAcCzrnEK/KMnhJL9YVR+sqmur6lFHL1RVV1bVwao6ePjI/WtsDgB4wDoB35fkkiRv7u6nJfmLJFcfvVB3X9PdB7r7wDn7z15jcwDAA9YJ+KEkh7r7ltX1G7MVdADgYbbngHf3nya5p6qevLrp0iQfW2QqAOCY1n0W+iuS3LB6Bvqnknz/+iMBALtZK+Dd/aEkBxaaBQA4Tl6JDQAGEnAAGEjAAWAgAQeAgQQcAAYScAAYSMABYCABB4CB1n0lNuAY3nDbvZseAThFOQIHgIEEHAAGEnAAGEjAAWAgAQeAgQQcAAYScAAYSMABYCABB4CBBBwABhJwABhIwAFgIAEHgIEEHAAGEnAAGEjAAWAgAQeAgQQcAAYScAAYSMABYCABB4CBBBwABhJwABhIwAFgIAEHgIEEHAAGEnAAGEjAAWAgAQeAgQQcAAYScAAYSMABYCABB4CBBBwABhJwABhIwAFgoLUCXlWvrqrbq+qjVfW2qnrEUoMBADvbc8Cr6rwkr0xyoLufmuS0JC9ZajAAYGfrnkLfl+SRVbUvyRlJPrP+SADAbvYc8O7+dJKfSXJ3knuT/Fl3v+fo5arqyqo6WFUHDx+5f++TAgB/Y51T6GcluTzJE5Ocm+RRVfWyo5fr7mu6+0B3Hzhn/9l7nxQA+BvrnEJ/bpI7u/twd/9VkpuSfOcyYwEAx7JOwO9O8oyqOqOqKsmlSe5YZiwA4FjWeQz8liQ3JrktyUdW67pmobkAgGPYt84Hd/dPJfmphWYBAI6TV2IDgIEEHAAGEnAAGEjAAWAgAQeAgQQcAAYScAAYSMABYCABB4CBBBwABhJwABhIwAFgIAEHgIEEHAAGEnAAGEjAAWAgAQeAgQQcAAYScAAYSMABYCABB4CBBBwABhJwABhIwAFgIAEHgIEEHAAGEnAAGEjAAWAgAQeAgQQcAAYScAAYSMABYCABB4CBBBwABhJwABhIwAFgIAEHgIEEHAAGEnAAGEjAAWAgAQeAgQQcAAYScAAYSMABYCABB4CBdg14Vb21qu6rqo9uu+3rq+rmqvrE6v1ZD++YAMB2x3MEfl2Sy4667eok7+3ui5O8d3UdADhBdg14d78vyeeOuvnyJNevLl+f5MULzwUAHMNeHwP/hu6+N0lW7x+304JVdWVVHayqg4eP3L/HzQEA2z3sT2Lr7mu6+0B3Hzhn/9kP9+YA4GvCXgP+2ap6fJKs3t+33EgAwG72GvB3JblidfmKJO9cZhwA4Hgcz6+RvS3JB5I8uaoOVdUPJnljkudV1SeSPG91HQA4QfbttkB3v3SHuy5deBYA4Dh5JTYAGEjAAWAgAQeAgQQcAAYScAAYSMABYCABB4CBBBwABqruPnEbqzqc5E8e4oftT3LkYRhn0+zXLPZrllN1v5JTd9/s19/6pu4+Z7eFTmjA96KqDnb3gU3PsTT7NYv9muVU3a/k1N03+/XQOYUOAAMJOAAMNCHg12x6gIeJ/ZrFfs1yqu5Xcurum/16iE76x8ABgK824QgcADjKSR3wqrqsqv6oqj5ZVVdvep4lVNUFVfW7VXVHVd1eVVdteqYlVdVpVfXBqvr1Tc+ylKp6bFXdWFUfX/2/PXPTMy2hql69+hz8aFW9raoesemZ9qKq3lpV91XVR7fd9vVVdXNVfWL1/qxNzrgXO+zXT68+D/+wqv57VT12kzPuxYPt17b7frSquqr2b2K2dey0X1X1ilXHbq+q/7zkNk/agFfVaUl+IcnzkzwlyUur6imbnWoRX07ymu7+h0mekeQ/nCL79YCrktyx6SEW9nNJ3t3d35Lk23IK7F9VnZfklUkOdPdTk5yW5CWbnWrPrkty2VG3XZ3kvd19cZL3rq5Pc12+er9uTvLU7v7WJH+c5HUneqgFXJev3q9U1QVJnpfk7hM90EKuy1H7VVX/LMnlSb61u/9Rkp9ZcoMnbcCTPD3JJ7v7U939pSRvz9Y/xGjdfW9337a6/MVsxeC8zU61jKo6P8n3JLl207Mspaoek+TZSd6SJN39pe7+/GanWsy+JI+sqn1JzkjymQ3Psyfd/b4knzvq5suTXL+6fH2SF5/QoRbwYPvV3e/p7i+vrv6fJOef8MHWtMP/V5L8lySvTTLyiVk77NcPJXljd/+/1TL3LbnNkzng5yW5Z9v1QzlFQveAqrowydOS3LLZSRbzs9n6AvzrTQ+yoIuSHE7yi6uHBq6tqkdteqh1dfens3U0cHeSe5P8WXe/Z7NTLeobuvveZOuH5iSP2/A8D4cfSPKbmx5iCVX1oiSf7u4Pb3qWhT0pyXdV1S1V9b+q6juWXPnJHPB6kNtG/mT2YKrqzCTvSPKq7v7CpudZV1W9MMl93X3rpmdZ2L4klyR5c3c/LclfZObp2L9j9Zjw5UmemOTcJI+qqpdtdiqOV1X9ZLYejrth07Osq6rOSPKTSV6/6VkeBvuSnJWth0t/LMmvVNWDtW1PTuaAH0pywbbr52foKb6jVdXXZSveN3T3TZueZyHPSvKiqrorWw93PKeqfmmzIy3iUJJD3f3AWZIbsxX06Z6b5M7uPtzdf5XkpiTfueGZlvTZqnp8kqzeL3rqcpOq6ookL0zy7/rU+D3gf5CtHyQ/vPr+cX6S26rqGzc61TIOJbmpt/x+ts5OLvYEvZM54H+Q5OKqemJVnZ6tJ9i8a8MzrW3109dbktzR3W/a9DxL6e7Xdff53X1htv6vfqe7xx/RdfefJrmnqp68uunSJB/b4EhLuTvJM6rqjNXn5KU5BZ6ct827klyxunxFknducJbFVNVlSX48yYu6+/9uep4ldPdHuvtx3X3h6vvHoSSXrL72pvu1JM9Jkqp6UpLTs+AfbDlpA756osaPJPmtbH1j+ZXuvn2zUy3iWUm+L1tHqB9avb1g00NxTK9IckNV/WGSb0/ynzY8z9pWZxRuTHJbko9k63vByFfCqqq3JflAkidX1aGq+sEkb0zyvKr6RLae2fzGTc64Fzvs188neXSSm1ffO/7rRofcgx32a7wd9uutSS5a/WrZ25NcseRZE6/EBgADnbRH4ADAzgQcAAYScAAYSMABYCABB4CBBBwABhJwABhIwAFgoP8PmFm83a4TWvMAAAAASUVORK5CYII=\n", "text/plain": [ - "'False: 0.17, True: 0.83'" + "" ] }, - "execution_count": 46, "metadata": {}, - "output_type": "execute_result" + "output_type": "display_data" } ], "source": [ - "gibbs_ask('Cloudy', dict(Rain=True), sprinkler, 200).show_approx()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Inference in Temporal Models" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Before we start, it will be helpful to understand the structure of a temporal model. We will use the example of the book with the guard and the umbrella. In this example, the state $\\textbf{X}$ is whether it is a rainy day (`X = True`) or not (`X = False`) at Day $\\textbf{t}$. In the sensor or observation model, the observation or evidence $\\textbf{U}$ is whether the professor holds an umbrella (`U = True`) or not (`U = False`) on **Day** $\\textbf{t}$. Based on that, the transition model is \n", - "\n", - "| $X_{t-1}$ | $X_{t}$ | **P**$(X_{t}| X_{t-1})$| \n", - "| ------------- |------------- | ----------------------------------|\n", - "| ***${False}$*** | ***${False}$*** | 0.7 |\n", - "| ***${False}$*** | ***${True}$*** | 0.3 |\n", - "| ***${True}$*** | ***${False}$*** | 0.3 |\n", - "| ***${True}$*** | ***${True}$*** | 0.7 |\n", - "\n", - "And the the sensor model will be,\n", - "\n", - "| $X_{t}$ | $U_{t}$ | **P**$(U_{t}|X_{t})$| \n", - "| :-------------: |:-------------: | :------------------------:|\n", - "| ***${False}$*** | ***${True}$*** | 0.2 |\n", - "| ***${False}$*** | ***${False}$*** | 0.8 |\n", - "| ***${True}$*** | ***${True}$*** | 0.9 |\n", - "| ***${True}$*** | ***${False}$*** | 0.1 |\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In the filtering task we are given evidence **U** in each time **t** and we want to compute the belief $B_{t}(x)= P(X_{t}|U_{1:t})$. \n", - "We can think of it as a three step process:\n", - "1. In every step we start with the current belief $P(X_{t}|e_{1:t})$\n", - "2. We update it for time\n", - "3. We update it for evidence\n", - "\n", - "The forward algorithm performs the step 2 and 3 at once. It updates, or better say reweights, the initial belief using the transition and the sensor model. Let's see the umbrella example. On **Day 0** no observation is available, and for that reason we will assume that we have equal possibilities to rain or not. In the **`HiddenMarkovModel`** class, the prior probabilities for **Day 0** are by default [0.5, 0.5]. " - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "%psource HiddenMarkovModel" + "a = {'v': (0, 1), 'w': 0}\n", + "z = (2, 3, 5, 7)\n", + "S = monte_carlo_localization(a, z, 1000, P_motion_sample, P_sensor, m, S)\n", + "grid = [[0]*17 for _ in range(11)]\n", + "for x, y, _ in S:\n", + " if 0 <= x < 11 and 0 <= y < 17:\n", + " grid[x][y] += 1\n", + "print(\"GRID:\")\n", + "print_table(grid)\n", + "heatmap(grid, cmap='Oranges')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "We instantiate the object **`hmm`** of the class using a list of lists for both the transition and the sensor model." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "umbrella_transition_model = [[0.7, 0.3], [0.3, 0.7]]\n", - "umbrella_sensor_model = [[0.9, 0.2], [0.1, 0.8]]\n", - "hmm = HiddenMarkovModel(umbrella_transition_model, umbrella_sensor_model)" + "In this case, the robot is 99.9% certain that it is at position `(6, 7)`." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "The **`sensor_dist()`** method returns a list with the conditional probabilities of the sensor model." + "## INFORMATION GATHERING AGENT\n", + "We now move into the domain of probabilistic decision making.\n", + "Before we discuss what an information gathering agent is, we'll need to know what decision networks are.\n", + "For an agent in an environment, a decision network represents information about the agent's current state, its possible actions, the state that will result from the agent's action, and the utility of that state.\n", + "Decision networks have three primary kinds of nodes which are:\n", + "1. __Chance nodes__: These represent random variables, just like in Bayesian networks.\n", + "2. __Decision nodes__: These represent points where the decision-makes has a choice between different actions and the decision maker tries to find the optimal decision at these nodes with regard to the cost, safety and resulting utility.\n", + "3. __Utility nodes__: These represent the agent's utility function.\n", + "A description of the agent's utility as a function is associated with a utility node.\n", + "
\n", + "
\n", + "To evaluate a decision network, we do the following:\n", + "1. Initialize the evidence variables according to the current state.\n", + "2. Calculate posterior probabilities for each possible value of the decision node and calculate the utility resulting from that action.\n", + "3. Return the action with the highest utility.\n", + "
\n", + "Let's have a look at the implementation of the `DecisionNetwork` class." ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 98, "metadata": {}, "outputs": [ { "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + " Codestin Search App\n", + " \n", + " \n", + "\n", + "\n", + "

\n", + "\n", + "
class DecisionNetwork(BayesNet):\n",
+       "    """An abstract class for a decision network as a wrapper for a BayesNet.\n",
+       "    Represents an agent's current state, its possible actions, reachable states\n",
+       "    and utilities of those states."""\n",
+       "\n",
+       "    def __init__(self, action, infer):\n",
+       "        """action: a single action node\n",
+       "        infer: the preferred method to carry out inference on the given BayesNet"""\n",
+       "        super(DecisionNetwork, self).__init__()\n",
+       "        self.action = action\n",
+       "        self.infer = infer\n",
+       "\n",
+       "    def best_action(self):\n",
+       "        """Return the best action in the network"""\n",
+       "        return self.action\n",
+       "\n",
+       "    def get_utility(self, action, state):\n",
+       "        """Return the utility for a particular action and state in the network"""\n",
+       "        raise NotImplementedError\n",
+       "\n",
+       "    def get_expected_utility(self, action, evidence):\n",
+       "        """Compute the expected utility given an action and evidence"""\n",
+       "        u = 0.0\n",
+       "        prob_dist = self.infer(action, evidence, self).prob\n",
+       "        for item, _ in prob_dist.items():\n",
+       "            u += prob_dist[item] * self.get_utility(action, item)\n",
+       "\n",
+       "        return u\n",
+       "
\n", + "\n", + "\n" + ], "text/plain": [ - "[0.9, 0.2]" + "" ] }, - "execution_count": 12, "metadata": {}, - "output_type": "execute_result" + "output_type": "display_data" } ], "source": [ - "hmm.sensor_dist(ev=True)" + "psource(DecisionNetwork)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "The observation update is calculated with the **`forward()`** function. Basically, we update our belief using the observation model. The function returns a list with the probabilities of **raining or not** on **Day 1**." - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "psource(forward)" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The probability of raining on day 1 is 0.82\n" - ] - } - ], - "source": [ - "umbrella_prior = [0.5, 0.5]\n", - "belief_day_1 = forward(hmm, umbrella_prior, ev=True)\n", - "print ('The probability of raining on day 1 is {:.2f}'.format(belief_day_1[0]))" + "The `DecisionNetwork` class inherits from `BayesNet` and has a few extra helper methods.\n", + "
\n", + "`best_action` returns the best action in the network.\n", + "
\n", + "`get_utility` is an abstract method which is supposed to return the utility of a particular action and state in the network.\n", + "
\n", + "`get_expected_utility` computes the expected utility, given an action and evidence.\n", + "
" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "In **Day 2** our initial belief is the updated belief of **Day 1**. Again using the **`forward()`** function we can compute the probability of raining in **Day 2**" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The probability of raining in day 2 is 0.88\n" - ] - } - ], - "source": [ - "belief_day_2 = forward(hmm, belief_day_1, ev=True)\n", - "print ('The probability of raining in day 2 is {:.2f}'.format(belief_day_2[0]))" + "Before we proceed, we need to know a few more terms.\n", + "
\n", + "Having __perfect information__ refers to a state of being fully aware of the current state, the cost functions and the outcomes of actions.\n", + "This in turn allows an agent to find the exact utility value of each state.\n", + "If an agent has perfect information about the environment, maximum expected utility calculations are exact and can be computed with absolute certainty.\n", + "
\n", + "In decision theory, the __value of perfect information__ (VPI) is the price that an agent would be willing to pay in order to gain access to _perfect information_.\n", + "VPI calculations are extensively used to calculate expected utilities for nodes in a decision network.\n", + "
\n", + "For a random variable $E_j$ whose value is currently unknown, the value of discovering $E_j$, given current information $e$ must average over all possible values $e_{jk}$ that we might discover for $E_j$, using our _current_ beliefs about its value.\n", + "The VPI of $E_j$ is then given by:\n", + "
\n", + "
\n", + "$$VPI_e(E_j) = \\left(\\sum_{k}P(E_j=e_{jk}\\ |\\ e) EU(\\alpha_{e_{jk}}\\ |\\ e, E_j=e_{jk})\\right) - EU(\\alpha\\ |\\ e)$$\n", + "
\n", + "VPI is _non-negative_, _non-additive_ and _order-indepentent_." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "In the smoothing part we are interested in computing the distribution over past states given evidence up to the present. Assume that we want to compute the distribution for the time **k**, for $0\\leq k\n", + "As an overview, an information gathering agent works by repeatedly selecting the observations with the highest information value, until the cost of the next observation is greater than its expected benefit.\n", + "
\n", + "The `InformationGatheringAgent` class is an abstract class that inherits from `Agent` and works on the principles discussed above.\n", + "Let's have a look.\n", + "
" ] }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 99, "metadata": {}, "outputs": [ { "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + " Codestin Search App\n", + " \n", + " \n", + "\n", + "\n", + "

\n", + "\n", + "
class InformationGatheringAgent(Agent):\n",
+       "    """A simple information gathering agent. The agent works by repeatedly selecting\n",
+       "    the observation with the highest information value, until the cost of the next\n",
+       "    observation is greater than its expected benefit. [Figure 16.9]"""\n",
+       "\n",
+       "    def __init__(self, decnet, infer, initial_evidence=None):\n",
+       "        """decnet: a decision network\n",
+       "        infer: the preferred method to carry out inference on the given decision network\n",
+       "        initial_evidence: initial evidence"""\n",
+       "        self.decnet = decnet\n",
+       "        self.infer = infer\n",
+       "        self.observation = initial_evidence or []\n",
+       "        self.variables = self.decnet.nodes\n",
+       "\n",
+       "    def integrate_percept(self, percept):\n",
+       "        """Integrate the given percept into the decision network"""\n",
+       "        raise NotImplementedError\n",
+       "\n",
+       "    def execute(self, percept):\n",
+       "        """Execute the information gathering algorithm"""\n",
+       "        self.observation = self.integrate_percept(percept)\n",
+       "        vpis = self.vpi_cost_ratio(self.variables)\n",
+       "        j = argmax(vpis)\n",
+       "        variable = self.variables[j]\n",
+       "\n",
+       "        if self.vpi(variable) > self.cost(variable):\n",
+       "            return self.request(variable)\n",
+       "\n",
+       "        return self.decnet.best_action()\n",
+       "\n",
+       "    def request(self, variable):\n",
+       "        """Return the value of the given random variable as the next percept"""\n",
+       "        raise NotImplementedError\n",
+       "\n",
+       "    def cost(self, var):\n",
+       "        """Return the cost of obtaining evidence through tests, consultants or questions"""\n",
+       "        raise NotImplementedError\n",
+       "\n",
+       "    def vpi_cost_ratio(self, variables):\n",
+       "        """Return the VPI to cost ratio for the given variables"""\n",
+       "        v_by_c = []\n",
+       "        for var in variables:\n",
+       "            v_by_c.append(self.vpi(var) / self.cost(var))\n",
+       "        return v_by_c\n",
+       "\n",
+       "    def vpi(self, variable):\n",
+       "        """Return VPI for a given variable"""\n",
+       "        vpi = 0.0\n",
+       "        prob_dist = self.infer(variable, self.observation, self.decnet).prob\n",
+       "        for item, _ in prob_dist.items():\n",
+       "            post_prob = prob_dist[item]\n",
+       "            new_observation = list(self.observation)\n",
+       "            new_observation.append(item)\n",
+       "            expected_utility = self.decnet.get_expected_utility(variable, new_observation)\n",
+       "            vpi += post_prob * expected_utility\n",
+       "\n",
+       "        vpi -= self.decnet.get_expected_utility(variable, self.observation)\n",
+       "        return vpi\n",
+       "
\n", + "\n", + "\n" + ], "text/plain": [ - "[0.6272727272727272, 0.37272727272727274]" + "" ] }, - "execution_count": 23, "metadata": {}, - "output_type": "execute_result" + "output_type": "display_data" } ], "source": [ - "b = [1, 1]\n", - "backward(hmm, b, ev=True)" + "psource(InformationGatheringAgent)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Some may notice that the result is not the same as in the book. The main reason is that in the book the normalization step is not used. If we want to normalize the result, one can use the **`normalize()`** helper function.\n", - "\n", - "In order to find the smoothed estimate for raining in **Day k**, we will use the **`forward_backward()`** function. As in the example in the book, the umbrella is observed in both days and the prior distribution is [0.5, 0.5]" - ] - }, - { - "cell_type": "code", - "execution_count": 50, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "pseudocode('Forward-Backward')" + "The `cost` method is an abstract method that returns the cost of obtaining the evidence through tests, consultants, questions or any other means.\n", + "
\n", + "The `request` method returns the value of the given random variable as the next percept.\n", + "
\n", + "The `vpi_cost_ratio` method returns a list of VPI divided by cost for each variable in the `variables` list provided to it.\n", + "
\n", + "The `vpi` method calculates the VPI for a given variable\n", + "
\n", + "And finally, the `execute` method executes the general information gathering algorithm, as described in __figure 16.9__ in the book.\n", + "
\n", + "Our agent implements a form of information gathering that is called __myopic__ as the VPI formula is used shortsightedly here.\n", + "It calculates the value of information as if only a single evidence variable will be acquired.\n", + "This is similar to greedy search, where we do not look at the bigger picture and aim for local optimizations to hopefully reach the global optimum.\n", + "This often works well in practice but a myopic agent might hastily take an action when it would have been better to request more variables before taking an action.\n", + "A _conditional plan_, on the other hand might work better for some scenarios.\n", + "
\n" ] }, { - "cell_type": "code", - "execution_count": 24, + "cell_type": "markdown", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The probability of raining in Day 0 is 0.65 and in Day 1 is 0.88\n" - ] - } - ], "source": [ - "umbrella_prior = [0.5, 0.5]\n", - "prob = forward_backward(hmm, ev=[T, T], prior=umbrella_prior)\n", - "print ('The probability of raining in Day 0 is {:.2f} and in Day 1 is {:.2f}'.format(prob[0][0], prob[1][0]))" + "With this we conclude this notebook." ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [] } ], "metadata": { @@ -1862,7 +6372,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.1" + "version": "3.6.4" } }, "nbformat": 4, diff --git a/probability.py b/probability.py index 205ae426e..458273b92 100644 --- a/probability.py +++ b/probability.py @@ -7,6 +7,7 @@ weighted_sample_with_replacement, isclose, probability, normalize ) from logic import extend +from agents import Agent import random from collections import defaultdict @@ -201,6 +202,96 @@ def __repr__(self): return 'BayesNet({0!r})'.format(self.nodes) +class DecisionNetwork(BayesNet): + """An abstract class for a decision network as a wrapper for a BayesNet. + Represents an agent's current state, its possible actions, reachable states + and utilities of those states.""" + + def __init__(self, action, infer): + """action: a single action node + infer: the preferred method to carry out inference on the given BayesNet""" + super(DecisionNetwork, self).__init__() + self.action = action + self.infer = infer + + def best_action(self): + """Return the best action in the network""" + return self.action + + def get_utility(self, action, state): + """Return the utility for a particular action and state in the network""" + raise NotImplementedError + + def get_expected_utility(self, action, evidence): + """Compute the expected utility given an action and evidence""" + u = 0.0 + prob_dist = self.infer(action, evidence, self).prob + for item, _ in prob_dist.items(): + u += prob_dist[item] * self.get_utility(action, item) + + return u + + +class InformationGatheringAgent(Agent): + """A simple information gathering agent. The agent works by repeatedly selecting + the observation with the highest information value, until the cost of the next + observation is greater than its expected benefit. [Figure 16.9]""" + + def __init__(self, decnet, infer, initial_evidence=None): + """decnet: a decision network + infer: the preferred method to carry out inference on the given decision network + initial_evidence: initial evidence""" + self.decnet = decnet + self.infer = infer + self.observation = initial_evidence or [] + self.variables = self.decnet.nodes + + def integrate_percept(self, percept): + """Integrate the given percept into the decision network""" + raise NotImplementedError + + def execute(self, percept): + """Execute the information gathering algorithm""" + self.observation = self.integrate_percept(percept) + vpis = self.vpi_cost_ratio(self.variables) + j = argmax(vpis) + variable = self.variables[j] + + if self.vpi(variable) > self.cost(variable): + return self.request(variable) + + return self.decnet.best_action() + + def request(self, variable): + """Return the value of the given random variable as the next percept""" + raise NotImplementedError + + def cost(self, var): + """Return the cost of obtaining evidence through tests, consultants or questions""" + raise NotImplementedError + + def vpi_cost_ratio(self, variables): + """Return the VPI to cost ratio for the given variables""" + v_by_c = [] + for var in variables: + v_by_c.append(self.vpi(var) / self.cost(var)) + return v_by_c + + def vpi(self, variable): + """Return VPI for a given variable""" + vpi = 0.0 + prob_dist = self.infer(variable, self.observation, self.decnet).prob + for item, _ in prob_dist.items(): + post_prob = prob_dist[item] + new_observation = list(self.observation) + new_observation.append(item) + expected_utility = self.decnet.get_expected_utility(variable, new_observation) + vpi += post_prob * expected_utility + + vpi -= self.decnet.get_expected_utility(variable, self.observation) + return vpi + + class BayesNode: """A conditional probability distribution for a boolean variable, P(X | parents). Part of a BayesNet.""" @@ -433,7 +524,7 @@ def prior_sample(bn): # _________________________________________________________________________ -def rejection_sampling(X, e, bn, N): +def rejection_sampling(X, e, bn, N=10000): """Estimate the probability distribution of variable X given evidence e in BayesNet bn, using N samples. [Figure 14.14] Raises a ZeroDivisionError if all the N samples are rejected, @@ -459,7 +550,7 @@ def consistent_with(event, evidence): # _________________________________________________________________________ -def likelihood_weighting(X, e, bn, N): +def likelihood_weighting(X, e, bn, N=10000): """Estimate the probability distribution of variable X given evidence e in BayesNet bn. [Figure 14.15] >>> random.seed(1017) @@ -491,7 +582,7 @@ def weighted_sample(bn, e): # _________________________________________________________________________ -def gibbs_ask(X, e, bn, N): +def gibbs_ask(X, e, bn, N=1000): """[Figure 14.16]""" assert X not in e, "Query variable must be distinct from evidence" counts = {x: 0 for x in bn.variable_values(X)} # bold N in [Figure 14.16] diff --git a/tests/test_mdp.py b/tests/test_mdp.py index 00710bc9f..5552f7570 100644 --- a/tests/test_mdp.py +++ b/tests/test_mdp.py @@ -119,3 +119,43 @@ def test_transition_model(): assert mdp.T("a","plan3") == [(0.2, 'a'), (0.5, 'b'), (0.3, 'c')] assert mdp.T("b","plan2") == [(0.6, 'a'), (0.2, 'b'), (0.1, 'c'), (0.1, 'd')] assert mdp.T("c","plan1") == [(0.3, 'a'), (0.5, 'b'), (0.1, 'c'), (0.1, 'd')] + + +def test_pomdp_value_iteration(): + t_prob = [[[0.65, 0.35], [0.65, 0.35]], [[0.65, 0.35], [0.65, 0.35]], [[1.0, 0.0], [0.0, 1.0]]] + e_prob = [[[0.5, 0.5], [0.5, 0.5]], [[0.5, 0.5], [0.5, 0.5]], [[0.8, 0.2], [0.3, 0.7]]] + rewards = [[5, -10], [-20, 5], [-1, -1]] + + gamma = 0.95 + actions = ('0', '1', '2') + states = ('0', '1') + + pomdp = POMDP(actions, t_prob, e_prob, rewards, states, gamma) + utility = pomdp_value_iteration(pomdp, epsilon=5) + + for _, v in utility.items(): + sum_ = 0 + for element in v: + sum_ += sum(element) + # exact value was found to be -9.73231 + assert -9.76 < sum_ < -9.70 + + +def test_pomdp_value_iteration2(): + t_prob = [[[0.5, 0.5], [0.5, 0.5]], [[0.5, 0.5], [0.5, 0.5]], [[1.0, 0.0], [0.0, 1.0]]] + e_prob = [[[0.5, 0.5], [0.5, 0.5]], [[0.5, 0.5], [0.5, 0.5]], [[0.85, 0.15], [0.15, 0.85]]] + rewards = [[-100, 10], [10, -100], [-1, -1]] + + gamma = 0.95 + actions = ('0', '1', '2') + states = ('0', '1') + + pomdp = POMDP(actions, t_prob, e_prob, rewards, states, gamma) + utility = pomdp_value_iteration(pomdp, epsilon=100) + + for _, v in utility.items(): + sum_ = 0 + for element in v: + sum_ += sum(element) + # exact value was found to be -77.28259 + assert -77.31 < sum_ < -77.25 diff --git a/tests/test_planning.py b/tests/test_planning.py index 641a2eeca..5b6943ee3 100644 --- a/tests/test_planning.py +++ b/tests/test_planning.py @@ -117,8 +117,8 @@ def test_shopping_problem(): def test_graph_call(): - pddl = spare_tire() - graph = Graph(pddl) + planningproblem = spare_tire() + graph = Graph(planningproblem) levels_size = len(graph.levels) graph() @@ -162,11 +162,11 @@ def test_graphplan(): assert expr('Buy(Milk, SM)') in shopping_problem_solution -def test_total_order_planner(): +def test_linearize_class(): st = spare_tire() possible_solutions = [[expr('Remove(Spare, Trunk)'), expr('Remove(Flat, Axle)'), expr('PutOn(Spare, Axle)')], [expr('Remove(Flat, Axle)'), expr('Remove(Spare, Trunk)'), expr('PutOn(Spare, Axle)')]] - assert TotalOrderPlanner(st).execute() in possible_solutions + assert Linearize(st).execute() in possible_solutions ac = air_cargo() possible_solutions = [[expr('Load(C1, P1, SFO)'), expr('Load(C2, P2, JFK)'), expr('Fly(P1, SFO, JFK)'), expr('Fly(P2, JFK, SFO)'), expr('Unload(C1, P1, JFK)'), expr('Unload(C2, P2, SFO)')], @@ -182,7 +182,7 @@ def test_total_order_planner(): [expr('Load(C2, P2, JFK)'), expr('Fly(P2, JFK, SFO)'), expr('Load(C1, P1, SFO)'), expr('Fly(P1, SFO, JFK)'), expr('Unload(C1, P1, JFK)'), expr('Unload(C2, P2, SFO)')], [expr('Load(C2, P2, JFK)'), expr('Fly(P2, JFK, SFO)'), expr('Load(C1, P1, SFO)'), expr('Fly(P1, SFO, JFK)'), expr('Unload(C2, P2, SFO)'), expr('Unload(C1, P1, JFK)')] ] - assert TotalOrderPlanner(ac).execute() in possible_solutions + assert Linearize(ac).execute() in possible_solutions ss = socks_and_shoes() possible_solutions = [[expr('LeftSock'), expr('RightSock'), expr('LeftShoe'), expr('RightShoe')], @@ -192,21 +192,76 @@ def test_total_order_planner(): [expr('LeftSock'), expr('LeftShoe'), expr('RightSock'), expr('RightShoe')], [expr('RightSock'), expr('RightShoe'), expr('LeftSock'), expr('LeftShoe')] ] - assert TotalOrderPlanner(ss).execute() in possible_solutions + assert Linearize(ss).execute() in possible_solutions -# def test_double_tennis(): -# p = double_tennis_problem -# assert p.goal_test() is False +def test_expand_actions(): + assert len(PartialOrderPlanner(spare_tire()).expand_actions()) == 16 + assert len(PartialOrderPlanner(air_cargo()).expand_actions()) == 360 + assert len(PartialOrderPlanner(have_cake_and_eat_cake_too()).expand_actions()) == 2 + assert len(PartialOrderPlanner(socks_and_shoes()).expand_actions()) == 4 + assert len(PartialOrderPlanner(simple_blocks_world()).expand_actions()) == 12 + assert len(PartialOrderPlanner(three_block_tower()).expand_actions()) == 36 -# solution = [expr("Go(A, RightBaseLine, LeftBaseLine)"), -# expr("Hit(A, Ball, RightBaseLine)"), -# expr("Go(A, LeftNet, RightBaseLine)")] -# for action in solution: -# p.act(action) +def test_find_open_precondition(): + st = spare_tire() + pop = PartialOrderPlanner(st) + assert pop.find_open_precondition()[0] == expr('At(Spare, Axle)') + assert pop.find_open_precondition()[1] == pop.finish + assert pop.find_open_precondition()[2][0].name == 'PutOn' + + ss = socks_and_shoes() + pop = PartialOrderPlanner(ss) + assert (pop.find_open_precondition()[0] == expr('LeftShoeOn') and pop.find_open_precondition()[2][0].name == 'LeftShoe') or (pop.find_open_precondition()[0] == expr('RightShoeOn') and pop.find_open_precondition()[2][0].name == 'RightShoe') + assert pop.find_open_precondition()[1] == pop.finish + + cp = have_cake_and_eat_cake_too() + pop = PartialOrderPlanner(cp) + assert pop.find_open_precondition()[0] == expr('Eaten(Cake)') + assert pop.find_open_precondition()[1] == pop.finish + assert pop.find_open_precondition()[2][0].name == 'Eat' + + +def test_cyclic(): + st = spare_tire() + pop = PartialOrderPlanner(st) + graph = [('a', 'b'), ('a', 'c'), ('b', 'c'), ('b', 'd'), ('d', 'e'), ('e', 'c')] + assert not pop.cyclic(graph) + + graph = [('a', 'b'), ('a', 'c'), ('b', 'c'), ('b', 'd'), ('d', 'e'), ('e', 'c'), ('e', 'b')] + assert pop.cyclic(graph) + + graph = [('a', 'b'), ('a', 'c'), ('b', 'c'), ('b', 'd'), ('d', 'e'), ('e', 'c'), ('b', 'e'), ('a', 'e')] + assert not pop.cyclic(graph) + + graph = [('a', 'b'), ('a', 'c'), ('b', 'c'), ('b', 'd'), ('d', 'e'), ('e', 'c'), ('e', 'b'), ('b', 'e'), ('a', 'e')] + assert pop.cyclic(graph) + + +def test_partial_order_planner(): + ss = socks_and_shoes() + pop = PartialOrderPlanner(ss) + constraints, causal_links = pop.execute(display=False) + plan = list(reversed(list(pop.toposort(pop.convert(pop.constraints))))) + assert list(plan[0])[0].name == 'Start' + assert (list(plan[1])[0].name == 'LeftSock' and list(plan[1])[1].name == 'RightSock') or (list(plan[1])[0].name == 'RightSock' and list(plan[1])[1].name == 'LeftSock') + assert (list(plan[2])[0].name == 'LeftShoe' and list(plan[2])[1].name == 'RightShoe') or (list(plan[2])[0].name == 'RightShoe' and list(plan[2])[1].name == 'LeftShoe') + assert list(plan[3])[0].name == 'Finish' + + +def test_double_tennis(): + p = double_tennis_problem() + assert not goal_test(p.goals, p.init) + + solution = [expr("Go(A, RightBaseLine, LeftBaseLine)"), + expr("Hit(A, Ball, RightBaseLine)"), + expr("Go(A, LeftNet, RightBaseLine)")] + + for action in solution: + p.act(action) -# assert p.goal_test() + assert goal_test(p.goals, p.init) def test_job_shop_problem(): diff --git a/tests/test_probability.py b/tests/test_probability.py index a40ef9728..b4d720937 100644 --- a/tests/test_probability.py +++ b/tests/test_probability.py @@ -30,12 +30,25 @@ def test_probdist_basic(): P = ProbDist('Flip') P['H'], P['T'] = 0.25, 0.75 assert P['H'] == 0.25 + assert P['T'] == 0.75 + assert P['X'] == 0.00 + + P = ProbDist('BiasedDie') + P['1'], P['2'], P['3'], P['4'], P['5'], P['6'] = 10, 15, 25, 30, 40, 80 + P.normalize() + assert P['2'] == 0.075 + assert P['4'] == 0.15 + assert P['6'] == 0.4 def test_probdist_frequency(): P = ProbDist('X', {'lo': 125, 'med': 375, 'hi': 500}) assert (P['lo'], P['med'], P['hi']) == (0.125, 0.375, 0.5) + P = ProbDist('Pascal-5', {'x1': 1, 'x2': 5, 'x3': 10, 'x4': 10, 'x5': 5, 'x6': 1}) + assert (P['x1'], P['x2'], P['x3'], P['x4'], P['x5'], P['x6']) == ( + 0.03125, 0.15625, 0.3125, 0.3125, 0.15625, 0.03125) + def test_probdist_normalize(): P = ProbDist('Flip') @@ -43,6 +56,12 @@ def test_probdist_normalize(): P = P.normalize() assert (P.prob['H'], P.prob['T']) == (0.350, 0.650) + P = ProbDist('BiasedDie') + P['1'], P['2'], P['3'], P['4'], P['5'], P['6'] = 10, 15, 25, 30, 40, 80 + P = P.normalize() + assert (P.prob['1'], P.prob['2'], P.prob['3'], P.prob['4'], P.prob['5'], P.prob['6']) == ( + 0.05, 0.075, 0.125, 0.15, 0.2, 0.4) + def test_jointprob(): P = JointProbDist(['X', 'Y']) @@ -66,6 +85,20 @@ def test_enumerate_joint(): assert enumerate_joint(['X'], dict(Y=2), P) == 0 assert enumerate_joint(['X'], dict(Y=1), P) == 0.75 + Q = JointProbDist(['W', 'X', 'Y', 'Z']) + Q[0, 1, 1, 0] = 0.12 + Q[1, 0, 1, 1] = 0.4 + Q[0, 0, 1, 1] = 0.5 + Q[0, 0, 1, 0] = 0.05 + Q[0, 0, 0, 0] = 0.675 + Q[1, 1, 1, 0] = 0.3 + assert enumerate_joint(['W'], dict(X=0, Y=0, Z=1), Q) == 0 + assert enumerate_joint(['W'], dict(X=0, Y=0, Z=0), Q) == 0.675 + assert enumerate_joint(['W'], dict(X=0, Y=1, Z=1), Q) == 0.9 + assert enumerate_joint(['Y'], dict(W=1, X=0, Z=1), Q) == 0.4 + assert enumerate_joint(['Z'], dict(W=0, X=0, Y=0), Q) == 0.675 + assert enumerate_joint(['Z'], dict(W=1, X=1, Y=1), Q) == 0.3 + def test_enumerate_joint_ask(): P = JointProbDist(['X', 'Y']) @@ -78,6 +111,7 @@ def test_enumerate_joint_ask(): def test_bayesnode_p(): bn = BayesNode('X', 'Burglary', {T: 0.2, F: 0.625}) + assert bn.p(True, {'Burglary': True, 'Earthquake': False}) == 0.2 assert bn.p(False, {'Burglary': False, 'Earthquake': True}) == 0.375 assert BayesNode('W', '', 0.75).p(False, {'Random': True}) == 0.25 @@ -94,19 +128,100 @@ def test_enumeration_ask(): assert enumeration_ask( 'Burglary', dict(JohnCalls=T, MaryCalls=T), burglary).show_approx() == 'False: 0.716, True: 0.284' + assert enumeration_ask( + 'Burglary', dict(JohnCalls=T, MaryCalls=F), + burglary).show_approx() == 'False: 0.995, True: 0.00513' + assert enumeration_ask( + 'Burglary', dict(JohnCalls=F, MaryCalls=T), + burglary).show_approx() == 'False: 0.993, True: 0.00688' + assert enumeration_ask( + 'Burglary', dict(JohnCalls=T), + burglary).show_approx() == 'False: 0.984, True: 0.0163' + assert enumeration_ask( + 'Burglary', dict(MaryCalls=T), + burglary).show_approx() == 'False: 0.944, True: 0.0561' def test_elemination_ask(): - elimination_ask( + assert elimination_ask( 'Burglary', dict(JohnCalls=T, MaryCalls=T), burglary).show_approx() == 'False: 0.716, True: 0.284' + assert elimination_ask( + 'Burglary', dict(JohnCalls=T, MaryCalls=F), + burglary).show_approx() == 'False: 0.995, True: 0.00513' + assert elimination_ask( + 'Burglary', dict(JohnCalls=F, MaryCalls=T), + burglary).show_approx() == 'False: 0.993, True: 0.00688' + assert elimination_ask( + 'Burglary', dict(JohnCalls=T), + burglary).show_approx() == 'False: 0.984, True: 0.0163' + assert elimination_ask( + 'Burglary', dict(MaryCalls=T), + burglary).show_approx() == 'False: 0.944, True: 0.0561' + + +def test_prior_sample(): + random.seed(42) + all_obs = [prior_sample(burglary) for x in range(1000)] + john_calls_true = [observation for observation in all_obs if observation['JohnCalls'] == True] + mary_calls_true = [observation for observation in all_obs if observation['MaryCalls'] == True] + burglary_and_john = [observation for observation in john_calls_true if observation['Burglary'] == True] + burglary_and_mary = [observation for observation in mary_calls_true if observation['Burglary'] == True] + assert len(john_calls_true) / 1000 == 46 / 1000 + assert len(mary_calls_true) / 1000 == 13 / 1000 + assert len(burglary_and_john) / len(john_calls_true) == 1 / 46 + assert len(burglary_and_mary) / len(mary_calls_true) == 1 / 13 + + +def test_prior_sample2(): + random.seed(128) + all_obs = [prior_sample(sprinkler) for x in range(1000)] + rain_true = [observation for observation in all_obs if observation['Rain'] == True] + sprinkler_true = [observation for observation in all_obs if observation['Sprinkler'] == True] + rain_and_cloudy = [observation for observation in rain_true if observation['Cloudy'] == True] + sprinkler_and_cloudy = [observation for observation in sprinkler_true if observation['Cloudy'] == True] + assert len(rain_true) / 1000 == 0.476 + assert len(sprinkler_true) / 1000 == 0.291 + assert len(rain_and_cloudy) / len(rain_true) == 376 / 476 + assert len(sprinkler_and_cloudy) / len(sprinkler_true) == 39 / 291 def test_rejection_sampling(): random.seed(47) - rejection_sampling( + assert rejection_sampling( 'Burglary', dict(JohnCalls=T, MaryCalls=T), burglary, 10000).show_approx() == 'False: 0.7, True: 0.3' + assert rejection_sampling( + 'Burglary', dict(JohnCalls=T, MaryCalls=F), + burglary, 10000).show_approx() == 'False: 1, True: 0' + assert rejection_sampling( + 'Burglary', dict(JohnCalls=F, MaryCalls=T), + burglary, 10000).show_approx() == 'False: 0.987, True: 0.0128' + assert rejection_sampling( + 'Burglary', dict(JohnCalls=T), + burglary, 10000).show_approx() == 'False: 0.982, True: 0.0183' + assert rejection_sampling( + 'Burglary', dict(MaryCalls=T), + burglary, 10000).show_approx() == 'False: 0.965, True: 0.0348' + + +def test_rejection_sampling2(): + random.seed(42) + assert rejection_sampling( + 'Cloudy', dict(Rain=T, Sprinkler=T), + sprinkler, 10000).show_approx() == 'False: 0.56, True: 0.44' + assert rejection_sampling( + 'Cloudy', dict(Rain=T, Sprinkler=F), + sprinkler, 10000).show_approx() == 'False: 0.119, True: 0.881' + assert rejection_sampling( + 'Cloudy', dict(Rain=F, Sprinkler=T), + sprinkler, 10000).show_approx() == 'False: 0.951, True: 0.049' + assert rejection_sampling( + 'Cloudy', dict(Rain=T), + sprinkler, 10000).show_approx() == 'False: 0.205, True: 0.795' + assert rejection_sampling( + 'Cloudy', dict(Sprinkler=T), + sprinkler, 10000).show_approx() == 'False: 0.835, True: 0.165' def test_likelihood_weighting(): @@ -114,6 +229,40 @@ def test_likelihood_weighting(): assert likelihood_weighting( 'Burglary', dict(JohnCalls=T, MaryCalls=T), burglary, 10000).show_approx() == 'False: 0.702, True: 0.298' + assert likelihood_weighting( + 'Burglary', dict(JohnCalls=T, MaryCalls=F), + burglary, 10000).show_approx() == 'False: 0.993, True: 0.00656' + assert likelihood_weighting( + 'Burglary', dict(JohnCalls=F, MaryCalls=T), + burglary, 10000).show_approx() == 'False: 0.996, True: 0.00363' + assert likelihood_weighting( + 'Burglary', dict(JohnCalls=F, MaryCalls=F), + burglary, 10000).show_approx() == 'False: 1, True: 0.000126' + assert likelihood_weighting( + 'Burglary', dict(JohnCalls=T), + burglary, 10000).show_approx() == 'False: 0.979, True: 0.0205' + assert likelihood_weighting( + 'Burglary', dict(MaryCalls=T), + burglary, 10000).show_approx() == 'False: 0.94, True: 0.0601' + + +def test_likelihood_weighting2(): + random.seed(42) + assert likelihood_weighting( + 'Cloudy', dict(Rain=T, Sprinkler=T), + sprinkler, 10000).show_approx() == 'False: 0.559, True: 0.441' + assert likelihood_weighting( + 'Cloudy', dict(Rain=T, Sprinkler=F), + sprinkler, 10000).show_approx() == 'False: 0.12, True: 0.88' + assert likelihood_weighting( + 'Cloudy', dict(Rain=F, Sprinkler=T), + sprinkler, 10000).show_approx() == 'False: 0.951, True: 0.0486' + assert likelihood_weighting( + 'Cloudy', dict(Rain=T), + sprinkler, 10000).show_approx() == 'False: 0.198, True: 0.802' + assert likelihood_weighting( + 'Cloudy', dict(Sprinkler=T), + sprinkler, 10000).show_approx() == 'False: 0.833, True: 0.167' def test_forward_backward():