|
29 | 29 | "## CONTENTS\n",
|
30 | 30 | "\n",
|
31 | 31 | "* Overview\n",
|
32 |
| - "* Current-Best Learning" |
| 32 | + "* Current-Best Learning\n", |
| 33 | + "* Version-Space Learning" |
33 | 34 | ]
|
34 | 35 | },
|
35 | 36 | {
|
|
267 | 268 | "name": "stdout",
|
268 | 269 | "output_type": "stream",
|
269 | 270 | "text": [
|
270 |
| - "[{'Species': 'Cat', 'Rain': '!No'}, {'Coat': 'Yes', 'Species': 'Dog', 'Rain': 'Yes'}, {'Coat': 'Yes', 'Species': 'Cat'}]\n" |
| 271 | + "[{'Species': 'Cat', 'Rain': '!No'}, {'Coat': 'Yes', 'Rain': 'Yes'}, {'Coat': 'Yes'}]\n" |
271 | 272 | ]
|
272 | 273 | }
|
273 | 274 | ],
|
|
304 | 305 | ""
|
305 | 306 | ]
|
306 | 307 | },
|
| 308 | + { |
| 309 | + "cell_type": "markdown", |
| 310 | + "metadata": {}, |
| 311 | + "source": [ |
| 312 | + "With the function `r_example` we will build the dictionary examples:" |
| 313 | + ] |
| 314 | + }, |
| 315 | + { |
| 316 | + "cell_type": "code", |
| 317 | + "execution_count": 6, |
| 318 | + "metadata": { |
| 319 | + "collapsed": true |
| 320 | + }, |
| 321 | + "outputs": [], |
| 322 | + "source": [ |
| 323 | + "def r_example(Alt, Bar, Fri, Hun, Pat, Price, Rain, Res, Type, Est, GOAL):\n", |
| 324 | + " return {'Alt': Alt, 'Bar': Bar, 'Fri': Fri, 'Hun': Hun, 'Pat': Pat,\n", |
| 325 | + " 'Price': Price, 'Rain': Rain, 'Res': Res, 'Type': Type, 'Est': Est,\n", |
| 326 | + " 'GOAL': GOAL}" |
| 327 | + ] |
| 328 | + }, |
307 | 329 | {
|
308 | 330 | "cell_type": "markdown",
|
309 | 331 | "metadata": {
|
|
315 | 337 | },
|
316 | 338 | {
|
317 | 339 | "cell_type": "code",
|
318 |
| - "execution_count": 6, |
| 340 | + "execution_count": 7, |
319 | 341 | "metadata": {
|
320 | 342 | "collapsed": true
|
321 | 343 | },
|
322 | 344 | "outputs": [],
|
323 | 345 | "source": [
|
324 | 346 | "restaurant = [\n",
|
325 |
| - " {'Alt': 'Yes', 'Bar': 'No', 'Fri': 'No', 'Hun': 'Yes', 'Pat': 'Some',\n", |
326 |
| - " 'Price': '$$$', 'Rain': 'No', 'Res': 'Yes', 'Type': 'French', 'Est': '0-10',\n", |
327 |
| - " 'GOAL': True},\n", |
328 |
| - "\n", |
329 |
| - " {'Alt': 'Yes', 'Bar': 'No', 'Fri': 'No', 'Hun': 'Yes', 'Pat': 'Full',\n", |
330 |
| - " 'Price': '$', 'Rain': 'No', 'Res': 'No', 'Type': 'Thai', 'Est': '30-60',\n", |
331 |
| - " 'GOAL': False},\n", |
332 |
| - "\n", |
333 |
| - " {'Alt': 'No', 'Bar': 'Yes', 'Fri': 'No', 'Hun': 'No', 'Pat': 'Some',\n", |
334 |
| - " 'Price': '$', 'Rain': 'No', 'Res': 'No', 'Type': 'Burger', 'Est': '0-10',\n", |
335 |
| - " 'GOAL': True},\n", |
336 |
| - "\n", |
337 |
| - " {'Alt': 'Yes', 'Bar': 'No', 'Fri': 'Yes', 'Hun': 'Yes', 'Pat': 'Full',\n", |
338 |
| - " 'Price': '$', 'Rain': 'Yes', 'Res': 'No', 'Type': 'Thai', 'Est': '10-30',\n", |
339 |
| - " 'GOAL': True},\n", |
340 |
| - "\n", |
341 |
| - " {'Alt': 'Yes', 'Bar': 'No', 'Fri': 'Yes', 'Hun': 'No', 'Pat': 'Full',\n", |
342 |
| - " 'Price': '$$$', 'Rain': 'No', 'Res': 'Yes', 'Type': 'French', 'Est': '>60',\n", |
343 |
| - " 'GOAL': False},\n", |
344 |
| - "\n", |
345 |
| - " {'Alt': 'No', 'Bar': 'Yes', 'Fri': 'No', 'Hun': 'Yes', 'Pat': 'Some',\n", |
346 |
| - " 'Price': '$$', 'Rain': 'Yes', 'Res': 'Yes', 'Type': 'Italian', 'Est': '0-10',\n", |
347 |
| - " 'GOAL': True},\n", |
348 |
| - "\n", |
349 |
| - " {'Alt': 'No', 'Bar': 'Yes', 'Fri': 'No', 'Hun': 'No', 'Pat': 'None',\n", |
350 |
| - " 'Price': '$', 'Rain': 'Yes', 'Res': 'No', 'Type': 'Burger', 'Est': '0-10',\n", |
351 |
| - " 'GOAL': False},\n", |
352 |
| - "\n", |
353 |
| - " {'Alt': 'No', 'Bar': 'No', 'Fri': 'No', 'Hun': 'Yes', 'Pat': 'Some',\n", |
354 |
| - " 'Price': '$$', 'Rain': 'Yes', 'Res': 'Yes', 'Type': 'Thai', 'Est': '0-10',\n", |
355 |
| - " 'GOAL': True},\n", |
356 |
| - "\n", |
357 |
| - " {'Alt': 'No', 'Bar': 'Yes', 'Fri': 'Yes', 'Hun': 'No', 'Pat': 'Full',\n", |
358 |
| - " 'Price': '$', 'Rain': 'Yes', 'Res': 'No', 'Type': 'Burger', 'Est': '>60',\n", |
359 |
| - " 'GOAL': False},\n", |
360 |
| - "\n", |
361 |
| - " {'Alt': 'Yes', 'Bar': 'Yes', 'Fri': 'Yes', 'Hun': 'Yes', 'Pat': 'Full',\n", |
362 |
| - " 'Price': '$$$', 'Rain': 'No', 'Res': 'Yes', 'Type': 'Italian', 'Est': '10-30',\n", |
363 |
| - " 'GOAL': False},\n", |
364 |
| - "\n", |
365 |
| - " {'Alt': 'No', 'Bar': 'No', 'Fri': 'No', 'Hun': 'No', 'Pat': 'None',\n", |
366 |
| - " 'Price': '$', 'Rain': 'No', 'Res': 'No', 'Type': 'Thai', 'Est': '0-10',\n", |
367 |
| - " 'GOAL': False},\n", |
368 |
| - "\n", |
369 |
| - " {'Alt': 'Yes', 'Bar': 'Yes', 'Fri': 'Yes', 'Hun': 'Yes', 'Pat': 'Full',\n", |
370 |
| - " 'Price': '$', 'Rain': 'No', 'Res': 'No', 'Type': 'Burger', 'Est': '30-60',\n", |
371 |
| - " 'GOAL': True}\n", |
| 347 | + " r_example('Yes', 'No', 'No', 'Yes', 'Some', '$$$', 'No', 'Yes', 'French', '0-10', True),\n", |
| 348 | + " r_example('Yes', 'No', 'No', 'Yes', 'Full', '$', 'No', 'No', 'Thai', '30-60', False),\n", |
| 349 | + " r_example('No', 'Yes', 'No', 'No', 'Some', '$', 'No', 'No', 'Burger', '0-10', True),\n", |
| 350 | + " r_example('Yes', 'No', 'Yes', 'Yes', 'Full', '$', 'Yes', 'No', 'Thai', '10-30', True),\n", |
| 351 | + " r_example('Yes', 'No', 'Yes', 'No', 'Full', '$$$', 'No', 'Yes', 'French', '>60', False),\n", |
| 352 | + " r_example('No', 'Yes', 'No', 'Yes', 'Some', '$$', 'Yes', 'Yes', 'Italian', '0-10', True),\n", |
| 353 | + " r_example('No', 'Yes', 'No', 'No', 'None', '$', 'Yes', 'No', 'Burger', '0-10', False),\n", |
| 354 | + " r_example('No', 'No', 'No', 'Yes', 'Some', '$$', 'Yes', 'Yes', 'Thai', '0-10', True),\n", |
| 355 | + " r_example('No', 'Yes', 'Yes', 'No', 'Full', '$', 'Yes', 'No', 'Burger', '>60', False),\n", |
| 356 | + " r_example('Yes', 'Yes', 'Yes', 'Yes', 'Full', '$$$', 'No', 'Yes', 'Italian', '10-30', False),\n", |
| 357 | + " r_example('No', 'No', 'No', 'No', 'None', '$', 'No', 'No', 'Thai', '0-10', False),\n", |
| 358 | + " r_example('Yes', 'Yes', 'Yes', 'Yes', 'Full', '$', 'No', 'No', 'Burger', '30-60', True)\n", |
372 | 359 | "]"
|
373 | 360 | ]
|
374 | 361 | },
|
|
381 | 368 | },
|
382 | 369 | {
|
383 | 370 | "cell_type": "code",
|
384 |
| - "execution_count": 7, |
| 371 | + "execution_count": 8, |
385 | 372 | "metadata": {},
|
386 | 373 | "outputs": [
|
387 | 374 | {
|
|
419 | 406 | },
|
420 | 407 | {
|
421 | 408 | "cell_type": "code",
|
422 |
| - "execution_count": 8, |
| 409 | + "execution_count": 9, |
423 | 410 | "metadata": {},
|
424 | 411 | "outputs": [
|
425 | 412 | {
|
426 | 413 | "name": "stdout",
|
427 | 414 | "output_type": "stream",
|
428 | 415 | "text": [
|
429 |
| - "[{'Type': '!Thai', 'Fri': '!Yes', 'Alt': 'Yes'}, {'Fri': 'No', 'Type': 'Burger', 'Pat': '!None', 'Alt': 'No'}, {'Fri': 'Yes', 'Est': '10-30', 'Pat': 'Full', 'Rain': 'Yes', 'Res': 'No', 'Bar': 'No', 'Price': '$'}, {'Fri': 'No', 'Est': '0-10', 'Pat': 'Some', 'Res': 'Yes', 'Type': 'Italian', 'Alt': 'No'}, {'Fri': 'No', 'Pat': 'Some', 'Res': 'Yes', 'Type': 'Thai', 'Hun': 'Yes', 'Alt': 'No', 'Price': '$$'}, {'Fri': 'Yes', 'Pat': 'Full', 'Rain': 'No', 'Alt': 'Yes', 'Type': 'Burger', 'Hun': 'Yes', 'Bar': 'Yes', 'Price': '$'}]\n" |
| 416 | + "[{'Res': '!No', 'Fri': '!Yes', 'Alt': 'Yes'}, {'Bar': 'Yes', 'Fri': 'No', 'Rain': 'No', 'Hun': 'No'}, {'Bar': 'No', 'Price': '$', 'Fri': 'Yes'}, {'Res': 'Yes', 'Price': '$$', 'Rain': 'Yes', 'Alt': 'No', 'Est': '0-10', 'Fri': 'No', 'Hun': 'Yes', 'Bar': 'Yes'}, {'Fri': 'No', 'Pat': 'Some', 'Price': '$$', 'Rain': 'Yes', 'Hun': 'Yes'}, {'Est': '30-60', 'Res': 'No', 'Price': '$', 'Fri': 'Yes', 'Hun': 'Yes'}]\n" |
430 | 417 | ]
|
431 | 418 | }
|
432 | 419 | ],
|
|
440 | 427 | "source": [
|
441 | 428 | "It might be quite complicated, with many disjunctions if we are unlucky, but it will always be correct, as long as a correct hypothesis exists."
|
442 | 429 | ]
|
| 430 | + }, |
| 431 | + { |
| 432 | + "cell_type": "markdown", |
| 433 | + "metadata": {}, |
| 434 | + "source": [ |
| 435 | + "## [VERSION-SPACE LEARNING](https://github.com/aimacode/aima-pseudocode/blob/master/md/Version-Space-Learning.md)\n", |
| 436 | + "\n", |
| 437 | + "### Overview\n", |
| 438 | + "\n", |
| 439 | + "**Version-Space Learning** is a general method of learning in logic based domains. We generate the set of all the possible hypotheses in the domain and then we iteratively remove hypotheses inconsistent with the examples. The set of remaining hypotheses is called **version space**. Because hypotheses are being removed until we end up with a set of hypotheses consistent with all the examples, the algorithm is sometimes called **candidate elimination** algorithm.\n", |
| 440 | + "\n", |
| 441 | + "After we update the set on an example, all the hypotheses in the set are consistent with that example. So, when all the examples have been parsed, all the remaining hypotheses in the set are consistent with all the examples. That means we can pick hypotheses at random and we will always get a valid hypothesis." |
| 442 | + ] |
| 443 | + }, |
| 444 | + { |
| 445 | + "cell_type": "markdown", |
| 446 | + "metadata": { |
| 447 | + "collapsed": true |
| 448 | + }, |
| 449 | + "source": [ |
| 450 | + "### Implementation\n", |
| 451 | + "\n", |
| 452 | + "The set of hypotheses is represented by a list and each hypothesis is represented by a list of dictionaries, each dictionary a disjunction. For each example in the given examples we update the version space with the function `version_space_update`. In the end, we return the version-space.\n", |
| 453 | + "\n", |
| 454 | + "Before we can start updating the version space, we need to generate it. We do that with the `all_hypotheses` function, which builds a list of all the possible hypotheses (including hypotheses with disjunctions). The function works like this: first it finds the possible values for each attribute (using `values_table`), then it builds all the attribute combinations (and adds them to the hypotheses set) and finally it builds the combinations of all the disjunctions (which in this case are the hypotheses build by the attribute combinations).\n", |
| 455 | + "\n", |
| 456 | + "You can read the code for all the functions by running the cells below:" |
| 457 | + ] |
| 458 | + }, |
| 459 | + { |
| 460 | + "cell_type": "code", |
| 461 | + "execution_count": 2, |
| 462 | + "metadata": { |
| 463 | + "collapsed": true |
| 464 | + }, |
| 465 | + "outputs": [], |
| 466 | + "source": [ |
| 467 | + "%psource version_space_learning" |
| 468 | + ] |
| 469 | + }, |
| 470 | + { |
| 471 | + "cell_type": "code", |
| 472 | + "execution_count": 3, |
| 473 | + "metadata": { |
| 474 | + "collapsed": true |
| 475 | + }, |
| 476 | + "outputs": [], |
| 477 | + "source": [ |
| 478 | + "%psource version_space_update" |
| 479 | + ] |
| 480 | + }, |
| 481 | + { |
| 482 | + "cell_type": "code", |
| 483 | + "execution_count": 4, |
| 484 | + "metadata": { |
| 485 | + "collapsed": true |
| 486 | + }, |
| 487 | + "outputs": [], |
| 488 | + "source": [ |
| 489 | + "%psource all_hypotheses" |
| 490 | + ] |
| 491 | + }, |
| 492 | + { |
| 493 | + "cell_type": "code", |
| 494 | + "execution_count": 5, |
| 495 | + "metadata": { |
| 496 | + "collapsed": true |
| 497 | + }, |
| 498 | + "outputs": [], |
| 499 | + "source": [ |
| 500 | + "%psource values_table" |
| 501 | + ] |
| 502 | + }, |
| 503 | + { |
| 504 | + "cell_type": "code", |
| 505 | + "execution_count": 6, |
| 506 | + "metadata": { |
| 507 | + "collapsed": true |
| 508 | + }, |
| 509 | + "outputs": [], |
| 510 | + "source": [ |
| 511 | + "%psource build_attr_combinations" |
| 512 | + ] |
| 513 | + }, |
| 514 | + { |
| 515 | + "cell_type": "code", |
| 516 | + "execution_count": 7, |
| 517 | + "metadata": { |
| 518 | + "collapsed": true |
| 519 | + }, |
| 520 | + "outputs": [], |
| 521 | + "source": [ |
| 522 | + "%psource build_h_combinations" |
| 523 | + ] |
| 524 | + }, |
| 525 | + { |
| 526 | + "cell_type": "markdown", |
| 527 | + "metadata": {}, |
| 528 | + "source": [ |
| 529 | + "### Example\n", |
| 530 | + "\n", |
| 531 | + "Since the set of all possible hypotheses is enormous and would take a long time to generate, we will come up with another, even smaller domain. We will try and predict whether we will have a party or not given the availability of pizza and soda. Let's do it:" |
| 532 | + ] |
| 533 | + }, |
| 534 | + { |
| 535 | + "cell_type": "code", |
| 536 | + "execution_count": 8, |
| 537 | + "metadata": { |
| 538 | + "collapsed": true |
| 539 | + }, |
| 540 | + "outputs": [], |
| 541 | + "source": [ |
| 542 | + "party = [\n", |
| 543 | + " {'Pizza': 'Yes', 'Soda': 'No', 'GOAL': True},\n", |
| 544 | + " {'Pizza': 'Yes', 'Soda': 'Yes', 'GOAL': True},\n", |
| 545 | + " {'Pizza': 'No', 'Soda': 'No', 'GOAL': False}\n", |
| 546 | + "]" |
| 547 | + ] |
| 548 | + }, |
| 549 | + { |
| 550 | + "cell_type": "markdown", |
| 551 | + "metadata": {}, |
| 552 | + "source": [ |
| 553 | + "Even though it is obvious that no-pizza no-party, we will run the algorithm and see what other hypotheses are valid." |
| 554 | + ] |
| 555 | + }, |
| 556 | + { |
| 557 | + "cell_type": "code", |
| 558 | + "execution_count": 12, |
| 559 | + "metadata": {}, |
| 560 | + "outputs": [ |
| 561 | + { |
| 562 | + "name": "stdout", |
| 563 | + "output_type": "stream", |
| 564 | + "text": [ |
| 565 | + "True\n", |
| 566 | + "True\n", |
| 567 | + "False\n" |
| 568 | + ] |
| 569 | + } |
| 570 | + ], |
| 571 | + "source": [ |
| 572 | + "V = version_space_learning(party)\n", |
| 573 | + "for e in party:\n", |
| 574 | + " guess = False\n", |
| 575 | + " for h in V:\n", |
| 576 | + " if guess_value(e, h):\n", |
| 577 | + " guess = True\n", |
| 578 | + " break\n", |
| 579 | + "\n", |
| 580 | + " print(guess)" |
| 581 | + ] |
| 582 | + }, |
| 583 | + { |
| 584 | + "cell_type": "markdown", |
| 585 | + "metadata": {}, |
| 586 | + "source": [ |
| 587 | + "The results are correct for the given examples. Let's take a look at the version space:" |
| 588 | + ] |
| 589 | + }, |
| 590 | + { |
| 591 | + "cell_type": "code", |
| 592 | + "execution_count": 17, |
| 593 | + "metadata": {}, |
| 594 | + "outputs": [ |
| 595 | + { |
| 596 | + "name": "stdout", |
| 597 | + "output_type": "stream", |
| 598 | + "text": [ |
| 599 | + "959\n", |
| 600 | + "[{'Pizza': 'Yes'}, {'Soda': 'Yes'}]\n", |
| 601 | + "[{'Pizza': 'Yes'}, {'Pizza': '!No', 'Soda': 'No'}]\n", |
| 602 | + "True\n" |
| 603 | + ] |
| 604 | + } |
| 605 | + ], |
| 606 | + "source": [ |
| 607 | + "print(len(V))\n", |
| 608 | + "\n", |
| 609 | + "print(V[5])\n", |
| 610 | + "print(V[10])\n", |
| 611 | + "\n", |
| 612 | + "print([{'Pizza': 'Yes'}] in V)" |
| 613 | + ] |
| 614 | + }, |
| 615 | + { |
| 616 | + "cell_type": "markdown", |
| 617 | + "metadata": {}, |
| 618 | + "source": [ |
| 619 | + "There are almost 1000 hypotheses in the set. You can see that even with just two attributes the version space in very large.\n", |
| 620 | + "\n", |
| 621 | + "Our initial prediction is indeed in the set of hypotheses. Also, the two other random hypotheses we got are consistent with the examples (since they both include the \"Pizza is available\" disjunction)." |
| 622 | + ] |
443 | 623 | }
|
444 | 624 | ],
|
445 | 625 | "metadata": {
|
|
0 commit comments