Add files via upload

2021-02-23 12:00:02 -08:00
parent d649cf5c52
commit c4f9ae331a
1 changed files with 101 additions and 37 deletions
--- a/ipynb/SpellingBee.ipynb
+++ b/ipynb/SpellingBee.ipynb
@@ -63,10 +63,13 @@
   "source": [
    "Word = str # Type for a word\n",
    "\n",
-    "def valid_words(text) -> List[Word]:\n",
-    "    \"\"\"Words with at least 4 letters, no 'S', and no more than 7 distinct letters.\"\"\"\n",
-    "    return [w for w in text.upper().split() \n",
-    "            if len(w) >= 4 and 'S' not in w and len(set(w)) <= 7]\n",
+    "def valid(word) -> bool:\n",
+    "    \"\"\"Does word have at least 4 letters, no 'S', and no more than 7 distinct letters?\"\"\"\n",
+    "    return len(word) >= 4 and 'S' not in word and len(set(word)) <= 7\n",
+    "\n",
+    "def valid_words(text, valid=valid) -> List[Word]: \n",
+    "    \"\"\"All the valid words in text.\"\"\"\n",
+    "    return [w for w in text.upper().split() if valid(w)]\n",
    "\n",
    "def pangram_bonus(word) -> int: \n",
    "    \"\"\"Does a word get a bonus for having 7 distinct letters (some maybe more than once)?\"\"\"\n",
@@ -356,20 +359,20 @@
    {
     "data": {
      "text/plain": [
-       "[Honeycomb(letters='ACEIORT', center='A'),\n",
-       " Honeycomb(letters='ACEIORT', center='C'),\n",
-       " Honeycomb(letters='ACEIORT', center='E'),\n",
-       " Honeycomb(letters='ACEIORT', center='I'),\n",
-       " Honeycomb(letters='ACEIORT', center='O'),\n",
-       " Honeycomb(letters='ACEIORT', center='R'),\n",
-       " Honeycomb(letters='ACEIORT', center='T'),\n",
-       " Honeycomb(letters='AEGLMPX', center='A'),\n",
+       "[Honeycomb(letters='AEGLMPX', center='A'),\n",
       " Honeycomb(letters='AEGLMPX', center='E'),\n",
       " Honeycomb(letters='AEGLMPX', center='G'),\n",
       " Honeycomb(letters='AEGLMPX', center='L'),\n",
       " Honeycomb(letters='AEGLMPX', center='M'),\n",
       " Honeycomb(letters='AEGLMPX', center='P'),\n",
-       " Honeycomb(letters='AEGLMPX', center='X')]"
+       " Honeycomb(letters='AEGLMPX', center='X'),\n",
+       " Honeycomb(letters='ACEIORT', center='A'),\n",
+       " Honeycomb(letters='ACEIORT', center='C'),\n",
+       " Honeycomb(letters='ACEIORT', center='E'),\n",
+       " Honeycomb(letters='ACEIORT', center='I'),\n",
+       " Honeycomb(letters='ACEIORT', center='O'),\n",
+       " Honeycomb(letters='ACEIORT', center='R'),\n",
+       " Honeycomb(letters='ACEIORT', center='T')]"
      ]
     },
     "execution_count": 13,
@@ -536,8 +539,8 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "CPU times: user 9.77 ms, sys: 35 µs, total: 9.8 ms\n",
-      "Wall time: 9.8 ms\n"
+      "CPU times: user 9.27 ms, sys: 37 µs, total: 9.31 ms\n",
+      "Wall time: 9.3 ms\n"
     ]
    },
    {
@@ -579,7 +582,7 @@
    }
   ],
   "source": [
-    ".01 * 55902 / 60"
+    "55902 * 10/1000 / 60"
   ]
  },
  {
@@ -757,8 +760,8 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "CPU times: user 1.81 s, sys: 2.31 ms, total: 1.82 s\n",
-      "Wall time: 1.82 s\n"
+      "CPU times: user 1.81 s, sys: 3 ms, total: 1.81 s\n",
+      "Wall time: 1.81 s\n"
     ]
    },
    {
@@ -840,7 +843,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "def best_honeycomb(words) -> Honeycomb: \n",
+    "def best_honeycomb2(words) -> Honeycomb: \n",
    "    \"\"\"Return a honeycomb with highest game score on these words.\"\"\"\n",
    "    points_table = tabulate_points(words)\n",
    "    best, best_score = None, 0\n",
@@ -864,8 +867,8 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "CPU times: user 406 ms, sys: 1.13 ms, total: 407 ms\n",
-      "Wall time: 406 ms\n"
+      "CPU times: user 408 ms, sys: 1.74 ms, total: 410 ms\n",
+      "Wall time: 409 ms\n"
     ]
    },
    {
@@ -880,7 +883,7 @@
    }
   ],
   "source": [
-    "%time best_honeycomb(enable1)"
+    "%time best_honeycomb2(enable1)"
   ]
  },
  {
@@ -987,7 +990,7 @@
    {
     "data": {
      "text/plain": [
-       "[('has S', 103913), ('valid', 44585), ('> 7', 23400), ('< 4', 922)]"
+       "Counter({'< 4': 922, 'valid': 44585, 'has S': 103913, '> 7': 23400})"
      ]
     },
     "execution_count": 33,
@@ -1000,7 +1003,7 @@
    "        '< 4'   if len(w) < 4 else \n",
    "        '> 7'   if len(set(w)) > 7 else \n",
    "        'valid'\n",
-    "        for w in open('enable1.txt').read().upper().split()).most_common()"
+    "        for w in valid_words(open('enable1.txt').read(), lambda w: True))"
   ]
  },
  {
@@ -1075,7 +1078,68 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "The best honeycomb is also the highest scoring letter subset on its own (although it only gets 832 of the 3,898 total points from using all seven letters)."
+    "The best honeycomb is also the highest scoring letter subset on its own (although it only gets 832 of the 3,898 total points from using all seven letters).\n",
+    "\n",
+    "### How many honeycombs does `best_honeycomb2` consider?\n",
+    "\n",
+    "We know that `best_honeycomb` considers 7,986 × 7 = 55,902 honeycombs. How many does `best_honeycomb2` consider? We can answer that by wrapping `Honeycomb` with a decorator that counts calls:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(Honeycomb(letters='AEGINRT', center='R'), 8084)"
+      ]
+     },
+     "execution_count": 36,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "def call_counter(fn):\n",
+    "    \"Return a function that calls fn, and increments a counter on each call.\"\n",
+    "    def wrapped(*args, **kwds):\n",
+    "        wrapped.call_counter += 1\n",
+    "        return fn(*args, **kwds)\n",
+    "    wrapped.call_counter = 0\n",
+    "    return wrapped\n",
+    "    \n",
+    "Honeycomb = call_counter(Honeycomb)\n",
+    "\n",
+    "best_honeycomb2(enable1), Honeycomb.call_counter"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "14.0"
+      ]
+     },
+     "execution_count": 37,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "(8084 - 7986) / 7"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "That means that most pangrams are only considered once; for only 14 pangrams do we consider all seven centers."
   ]
  },
  {
@@ -1089,7 +1153,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": 38,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -1131,7 +1195,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 37,
+   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
@@ -1156,7 +1220,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 38,
+   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
@@ -1338,7 +1402,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 39,
+   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
@@ -1347,14 +1411,14 @@
       "(98141, 44585)"
      ]
     },
-     "execution_count": 39,
+     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
-    "enable1s = [w for w in open('enable1.txt').read().upper().split() \n",
-    "           if len(w) >= 4 and len(set(w)) <= 7]\n",
+    "enable1s = valid_words(open('enable1.txt').read(), \n",
+    "                       lambda w: len(w) >= 4 and len(set(w)) <= 7)\n",
    "\n",
    "len(enable1s), len(enable1)"
   ]
@@ -1368,7 +1432,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 40,
+   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
@@ -1664,10 +1728,10 @@
    "\n",
    "Thanks to a series of ideas, we were able to achieve a substantial reduction in the number of honeycombs that need to be examined (a factor of 400), the run time needed for `game_score` (a factor of about 200), and the overall run time (a factor of about 70,000).\n",
    "\n",
-    "- **Enumeration (10 hours (estimate) run time; 3,364,900 honeycombs)**<br>Try every possible honeycomb.\n",
-    "- **Pangram Lettersets (10 minutes (estimate) run time; 55,902 honeycombs)**<br>Try just the honeycombs that are pangram lettersets (with every center).\n",
-    "- **Points Table (under 2 seconds run time; 55,902 honeycombs)**<br>Precompute the score for each letterset, and sum the 64 letter subsets of each honeycomb.\n",
-    "- **Branch and Bound (under 1/2 second run time; 8,084 honeycombs)**<br>Try every center only for lettersets that score better than the best score so far.\n",
+    "- **Enumeration (3,364,900 honeycombs; 10 hours (estimate) run time)**<br>Try every possible honeycomb.\n",
+    "- **Pangram Lettersets (55,902 honeycombs; 10 minutes (estimate) run time)**<br>Try just the honeycombs that are pangram lettersets (with every center).\n",
+    "- **Points Table (55,902 honeycombs; under 2 seconds run time)**<br>Precompute the score for each letterset, and sum the 64 letter subsets of each honeycomb.\n",
+    "- **Branch and Bound (8,084 honeycombs; under 1/2 second run time)**<br>Try every center only for lettersets that score better than the best score so far.\n",
    "\n",
    "\n",
    "\n",