Day 5

mjpieters · mjpieters · commit 882b239fd700 · 2023-12-05T17:43:19.000Z
diff --git a/2023/Day 05.ipynb b/2023/Day 05.ipynb
@@ -0,0 +1,256 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# On the range\n",
+    "\n",
+    "https://adventofcode.com/2023/day/5\n",
+    "\n",
+    "Today's puzzle is all about ranges. Don't store each possible individual value, because the puzzle input uses the full 32 bit unsingned integer range!\n",
+    "\n",
+    "Instead, store just a tuple of the source, the length and the destination values. If you keep the list of ranges sorted my their starting points, you can then use [bisection](https://docs.python.org/3/library/bisect.html) to quickly find a matching source range and verify that the mapped value falls inside the range. If it does, map the value to the destination, if it doesn't, return the original value.\n",
+    "\n",
+    "The implementation for part one not only returns the mapped value, but also how many value remain in the range that was used to map the source value to the destination. This is used in part two.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import typing as t\n",
+    "from bisect import bisect\n",
+    "from dataclasses import dataclass\n",
+    "from operator import itemgetter\n",
+    "\n",
+    "\n",
+    "@dataclass\n",
+    "class AlmanacMap:\n",
+    "    from_: str\n",
+    "    to_: str\n",
+    "    ranges: list[tuple[int, int, int]]\n",
+    "\n",
+    "    @classmethod\n",
+    "    def from_entry(cls, entry: str) -> t.Self:\n",
+    "        first, *lines = entry.splitlines()\n",
+    "        from_, _, to_ = first.partition(\" \")[0].partition(\"-to-\")\n",
+    "        ranges = [\n",
+    "            (int(src), int(length), int(dst))\n",
+    "            for dst, src, length in map(str.split, lines)\n",
+    "        ]\n",
+    "        return cls(from_, to_, sorted(ranges, key=itemgetter(0)))\n",
+    "\n",
+    "    def __getitem__(self, value: int) -> tuple[int, int | None]:\n",
+    "        \"\"\"Map a value through the almanac table\n",
+    "\n",
+    "        Returns the new value, and the remaining length of the source section it\n",
+    "        was mapped through, or None if the value lies outside the maximum value\n",
+    "        of the table.\n",
+    "\n",
+    "        \"\"\"\n",
+    "        if (idx := bisect(self.ranges, value, key=itemgetter(0))) > 0:\n",
+    "            src, length, dst = self.ranges[idx - 1]\n",
+    "            if (offset := value - src) < length:\n",
+    "                return dst + offset, length - offset\n",
+    "        if idx < len(self.ranges):\n",
+    "            return value, self.ranges[idx][0] - value\n",
+    "        return value, None\n",
+    "\n",
+    "\n",
+    "@dataclass\n",
+    "class Almanac:\n",
+    "    seeds: list[int]\n",
+    "    maps: dict[str, AlmanacMap]\n",
+    "\n",
+    "    @classmethod\n",
+    "    def from_entries(cls, *entries: str) -> t.Self:\n",
+    "        seeds_line, *entries = entries\n",
+    "        seeds = [int(seed) for seed in seeds_line.partition(\": \")[-1].split()]\n",
+    "        maps = {map_.from_: map_ for map_ in map(AlmanacMap.from_entry, entries)}\n",
+    "        return cls(seeds, maps)\n",
+    "\n",
+    "    def __getitem__(self, seed: int) -> int:\n",
+    "        current = \"seed\"\n",
+    "        value = seed\n",
+    "        while current != \"location\":\n",
+    "            map_ = self.maps[current]\n",
+    "            current = map_.to_\n",
+    "            value, _ = map_[value]\n",
+    "        return value\n",
+    "\n",
+    "\n",
+    "test_almanac_text = \"\"\"\\\n",
+    "seeds: 79 14 55 13\n",
+    "\n",
+    "seed-to-soil map:\n",
+    "50 98 2\n",
+    "52 50 48\n",
+    "\n",
+    "soil-to-fertilizer map:\n",
+    "0 15 37\n",
+    "37 52 2\n",
+    "39 0 15\n",
+    "\n",
+    "fertilizer-to-water map:\n",
+    "49 53 8\n",
+    "0 11 42\n",
+    "42 0 7\n",
+    "57 7 4\n",
+    "\n",
+    "water-to-light map:\n",
+    "88 18 7\n",
+    "18 25 70\n",
+    "\n",
+    "light-to-temperature map:\n",
+    "45 77 23\n",
+    "81 45 19\n",
+    "68 64 13\n",
+    "\n",
+    "temperature-to-humidity map:\n",
+    "0 69 1\n",
+    "1 0 69\n",
+    "\n",
+    "humidity-to-location map:\n",
+    "60 56 37\n",
+    "56 93 4\n",
+    "\"\"\"\n",
+    "test_almanac = Almanac.from_entries(*test_almanac_text.split(\"\\n\\n\"))\n",
+    "assert min(test_almanac[seed] for seed in test_almanac.seeds) == 35"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Part 1: 382895070\n"
+     ]
+    }
+   ],
+   "source": [
+    "import aocd\n",
+    "\n",
+    "almanac = Almanac.from_entries(*aocd.get_data(day=5, year=2023).split(\"\\n\\n\"))\n",
+    "print(\"Part 1:\", min(almanac[seed] for seed in almanac.seeds))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# The green revolution is here!\n",
+    "\n",
+    "Part 2 just scales up part one. Luckily we are already using bisection to handle the lookups, it's the fastest way to handle any given seed lookup!\n",
+    "\n",
+    "However, there are still an _awful lot of seeds_ to process here. The total length of my puzzle input seed ranges covers more than 2 billion values. Even if you can map a given seed value to its location in 1 microsecond, it would still take about 40 minutes to map this much seed to locations.\n",
+    "\n",
+    "Instead of mapping individual values, we could map ranges; any given range might need to be split up by each map as they won't all be using the same mapping entries, but the splitting can be done entirely based on the lengths of the source ranges. This cuts down the amount of work significantly.\n",
+    "\n",
+    "I first refactored the code for part one to not only return the mapped value, but also the remaining length in the source range. The extra return value is not used anywhere else in part one, but in part two we can use this to then split up source ranges. The almanac then only has to return the start value of the smallest range after mapping.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from collections import deque\n",
+    "\n",
+    "\n",
+    "class RangeAlmanacMap(AlmanacMap):\n",
+    "    def __getitem__(self, values: tuple[range, ...]) -> tuple[range, ...]:\n",
+    "        results = []\n",
+    "        queue = deque(values)\n",
+    "        while queue:\n",
+    "            value = queue.popleft()\n",
+    "            size = len(value)\n",
+    "            dst, remainder = super().__getitem__(value.start)\n",
+    "            if remainder and size > remainder:\n",
+    "                # process the section that doesn't fit\n",
+    "                queue.append(value[remainder:])\n",
+    "                size = remainder\n",
+    "            # map the part of the range that fits\n",
+    "            results.append(range(dst, dst + size))\n",
+    "        return tuple(results)\n",
+    "\n",
+    "\n",
+    "@dataclass\n",
+    "class RangeAlmanac(Almanac):\n",
+    "    maps: dict[str, RangeAlmanacMap]\n",
+    "\n",
+    "    @classmethod\n",
+    "    def from_entries(cls, *entries: str) -> t.Self:\n",
+    "        inst = super().from_entries(*entries)\n",
+    "        inst.maps = {\n",
+    "            to_: RangeAlmanacMap(**vars(map_)) for to_, map_ in inst.maps.items()\n",
+    "        }\n",
+    "        return inst\n",
+    "\n",
+    "    def __getitem__(self, values: tuple[range, ...]) -> int:\n",
+    "        current = \"seed\"\n",
+    "        while current != \"location\":\n",
+    "            map_ = self.maps[current]\n",
+    "            current = map_.to_\n",
+    "            values = map_[values]\n",
+    "        return min(v.start for v in values)\n",
+    "\n",
+    "\n",
+    "def seed_ranges(*seeds: int) -> t.Iterator[range]:\n",
+    "    it = iter(seeds)\n",
+    "    for start, length in zip(it, it):\n",
+    "        yield range(start, start + length)\n",
+    "\n",
+    "\n",
+    "test_almanac = RangeAlmanac.from_entries(*test_almanac_text.split(\"\\n\\n\"))\n",
+    "assert test_almanac[tuple(seed_ranges(*test_almanac.seeds))] == 46"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Part 2: 17729182\n"
+     ]
+    }
+   ],
+   "source": [
+    "almanac = RangeAlmanac.from_entries(*aocd.get_data(day=5, year=2023).split(\"\\n\\n\"))\n",
+    "print(\"Part 2:\", almanac[tuple(seed_ranges(*almanac.seeds))])"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "adventofcode-bRnAxXn--py3.12",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}