diff --git a/ipynb/SpellingBee.ipynb b/ipynb/SpellingBee.ipynb
index 502551d..8eca526 100644
--- a/ipynb/SpellingBee.ipynb
+++ b/ipynb/SpellingBee.ipynb
@@ -63,10 +63,13 @@
"source": [
"Word = str # Type for a word\n",
"\n",
- "def valid_words(text) -> List[Word]:\n",
- " \"\"\"Words with at least 4 letters, no 'S', and no more than 7 distinct letters.\"\"\"\n",
- " return [w for w in text.upper().split() \n",
- " if len(w) >= 4 and 'S' not in w and len(set(w)) <= 7]\n",
+ "def valid(word) -> bool:\n",
+ " \"\"\"Does word have at least 4 letters, no 'S', and no more than 7 distinct letters?\"\"\"\n",
+ " return len(word) >= 4 and 'S' not in word and len(set(word)) <= 7\n",
+ "\n",
+ "def valid_words(text, valid=valid) -> List[Word]: \n",
+ " \"\"\"All the valid words in text.\"\"\"\n",
+ " return [w for w in text.upper().split() if valid(w)]\n",
"\n",
"def pangram_bonus(word) -> int: \n",
" \"\"\"Does a word get a bonus for having 7 distinct letters (some maybe more than once)?\"\"\"\n",
@@ -356,20 +359,20 @@
{
"data": {
"text/plain": [
- "[Honeycomb(letters='ACEIORT', center='A'),\n",
- " Honeycomb(letters='ACEIORT', center='C'),\n",
- " Honeycomb(letters='ACEIORT', center='E'),\n",
- " Honeycomb(letters='ACEIORT', center='I'),\n",
- " Honeycomb(letters='ACEIORT', center='O'),\n",
- " Honeycomb(letters='ACEIORT', center='R'),\n",
- " Honeycomb(letters='ACEIORT', center='T'),\n",
- " Honeycomb(letters='AEGLMPX', center='A'),\n",
+ "[Honeycomb(letters='AEGLMPX', center='A'),\n",
" Honeycomb(letters='AEGLMPX', center='E'),\n",
" Honeycomb(letters='AEGLMPX', center='G'),\n",
" Honeycomb(letters='AEGLMPX', center='L'),\n",
" Honeycomb(letters='AEGLMPX', center='M'),\n",
" Honeycomb(letters='AEGLMPX', center='P'),\n",
- " Honeycomb(letters='AEGLMPX', center='X')]"
+ " Honeycomb(letters='AEGLMPX', center='X'),\n",
+ " Honeycomb(letters='ACEIORT', center='A'),\n",
+ " Honeycomb(letters='ACEIORT', center='C'),\n",
+ " Honeycomb(letters='ACEIORT', center='E'),\n",
+ " Honeycomb(letters='ACEIORT', center='I'),\n",
+ " Honeycomb(letters='ACEIORT', center='O'),\n",
+ " Honeycomb(letters='ACEIORT', center='R'),\n",
+ " Honeycomb(letters='ACEIORT', center='T')]"
]
},
"execution_count": 13,
@@ -536,8 +539,8 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "CPU times: user 9.77 ms, sys: 35 µs, total: 9.8 ms\n",
- "Wall time: 9.8 ms\n"
+ "CPU times: user 9.27 ms, sys: 37 µs, total: 9.31 ms\n",
+ "Wall time: 9.3 ms\n"
]
},
{
@@ -579,7 +582,7 @@
}
],
"source": [
- ".01 * 55902 / 60"
+ "55902 * 10/1000 / 60"
]
},
{
@@ -757,8 +760,8 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "CPU times: user 1.81 s, sys: 2.31 ms, total: 1.82 s\n",
- "Wall time: 1.82 s\n"
+ "CPU times: user 1.81 s, sys: 3 ms, total: 1.81 s\n",
+ "Wall time: 1.81 s\n"
]
},
{
@@ -840,7 +843,7 @@
"metadata": {},
"outputs": [],
"source": [
- "def best_honeycomb(words) -> Honeycomb: \n",
+ "def best_honeycomb2(words) -> Honeycomb: \n",
" \"\"\"Return a honeycomb with highest game score on these words.\"\"\"\n",
" points_table = tabulate_points(words)\n",
" best, best_score = None, 0\n",
@@ -864,8 +867,8 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "CPU times: user 406 ms, sys: 1.13 ms, total: 407 ms\n",
- "Wall time: 406 ms\n"
+ "CPU times: user 408 ms, sys: 1.74 ms, total: 410 ms\n",
+ "Wall time: 409 ms\n"
]
},
{
@@ -880,7 +883,7 @@
}
],
"source": [
- "%time best_honeycomb(enable1)"
+ "%time best_honeycomb2(enable1)"
]
},
{
@@ -987,7 +990,7 @@
{
"data": {
"text/plain": [
- "[('has S', 103913), ('valid', 44585), ('> 7', 23400), ('< 4', 922)]"
+ "Counter({'< 4': 922, 'valid': 44585, 'has S': 103913, '> 7': 23400})"
]
},
"execution_count": 33,
@@ -1000,7 +1003,7 @@
" '< 4' if len(w) < 4 else \n",
" '> 7' if len(set(w)) > 7 else \n",
" 'valid'\n",
- " for w in open('enable1.txt').read().upper().split()).most_common()"
+ " for w in valid_words(open('enable1.txt').read(), lambda w: True))"
]
},
{
@@ -1075,7 +1078,68 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "The best honeycomb is also the highest scoring letter subset on its own (although it only gets 832 of the 3,898 total points from using all seven letters)."
+ "The best honeycomb is also the highest scoring letter subset on its own (although it only gets 832 of the 3,898 total points from using all seven letters).\n",
+ "\n",
+ "### How many honeycombs does `best_honeycomb2` consider?\n",
+ "\n",
+ "We know that `best_honeycomb` considers 7,986 × 7 = 55,902 honeycombs. How many does `best_honeycomb2` consider? We can answer that by wrapping `Honeycomb` with a decorator that counts calls:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(Honeycomb(letters='AEGINRT', center='R'), 8084)"
+ ]
+ },
+ "execution_count": 36,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "def call_counter(fn):\n",
+ " \"Return a function that calls fn, and increments a counter on each call.\"\n",
+ " def wrapped(*args, **kwds):\n",
+ " wrapped.call_counter += 1\n",
+ " return fn(*args, **kwds)\n",
+ " wrapped.call_counter = 0\n",
+ " return wrapped\n",
+ " \n",
+ "Honeycomb = call_counter(Honeycomb)\n",
+ "\n",
+ "best_honeycomb2(enable1), Honeycomb.call_counter"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "14.0"
+ ]
+ },
+ "execution_count": 37,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "(8084 - 7986) / 7"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "That means that most pangrams are only considered once; for only 14 pangrams do we consider all seven centers."
]
},
{
@@ -1089,7 +1153,7 @@
},
{
"cell_type": "code",
- "execution_count": 36,
+ "execution_count": 38,
"metadata": {},
"outputs": [],
"source": [
@@ -1131,7 +1195,7 @@
},
{
"cell_type": "code",
- "execution_count": 37,
+ "execution_count": 39,
"metadata": {},
"outputs": [
{
@@ -1156,7 +1220,7 @@
},
{
"cell_type": "code",
- "execution_count": 38,
+ "execution_count": 40,
"metadata": {},
"outputs": [
{
@@ -1338,7 +1402,7 @@
},
{
"cell_type": "code",
- "execution_count": 39,
+ "execution_count": 41,
"metadata": {},
"outputs": [
{
@@ -1347,14 +1411,14 @@
"(98141, 44585)"
]
},
- "execution_count": 39,
+ "execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "enable1s = [w for w in open('enable1.txt').read().upper().split() \n",
- " if len(w) >= 4 and len(set(w)) <= 7]\n",
+ "enable1s = valid_words(open('enable1.txt').read(), \n",
+ " lambda w: len(w) >= 4 and len(set(w)) <= 7)\n",
"\n",
"len(enable1s), len(enable1)"
]
@@ -1368,7 +1432,7 @@
},
{
"cell_type": "code",
- "execution_count": 40,
+ "execution_count": 42,
"metadata": {},
"outputs": [
{
@@ -1664,10 +1728,10 @@
"\n",
"Thanks to a series of ideas, we were able to achieve a substantial reduction in the number of honeycombs that need to be examined (a factor of 400), the run time needed for `game_score` (a factor of about 200), and the overall run time (a factor of about 70,000).\n",
"\n",
- "- **Enumeration (10 hours (estimate) run time; 3,364,900 honeycombs)**
Try every possible honeycomb.\n",
- "- **Pangram Lettersets (10 minutes (estimate) run time; 55,902 honeycombs)**
Try just the honeycombs that are pangram lettersets (with every center).\n",
- "- **Points Table (under 2 seconds run time; 55,902 honeycombs)**
Precompute the score for each letterset, and sum the 64 letter subsets of each honeycomb.\n",
- "- **Branch and Bound (under 1/2 second run time; 8,084 honeycombs)**
Try every center only for lettersets that score better than the best score so far.\n",
+ "- **Enumeration (3,364,900 honeycombs; 10 hours (estimate) run time)**
Try every possible honeycomb.\n",
+ "- **Pangram Lettersets (55,902 honeycombs; 10 minutes (estimate) run time)**
Try just the honeycombs that are pangram lettersets (with every center).\n",
+ "- **Points Table (55,902 honeycombs; under 2 seconds run time)**
Precompute the score for each letterset, and sum the 64 letter subsets of each honeycomb.\n",
+ "- **Branch and Bound (8,084 honeycombs; under 1/2 second run time)**
Try every center only for lettersets that score better than the best score so far.\n",
"\n",
"\n",
"\n",