Add files via upload
This commit is contained in:
parent
186a470753
commit
d795381879
@ -10,8 +10,9 @@
|
||||
"source": [
|
||||
"import re\n",
|
||||
"import itertools\n",
|
||||
"from collections import defaultdict\n",
|
||||
"from functools import lru_cache"
|
||||
"from collections import defaultdict\n",
|
||||
"from functools import lru_cache\n",
|
||||
"from math import factorial"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -20,13 +21,13 @@
|
||||
"source": [
|
||||
"# How to Count Things\n",
|
||||
"\n",
|
||||
"This notebook lists problems designed to show how to count things. Right now there are two problems.\n",
|
||||
"This notebook contains problems designed to show how to count things. Right now there are three example problems.\n",
|
||||
"\n",
|
||||
"# Student Records: Late, Absent, Present\n",
|
||||
"\n",
|
||||
"Consider this problem:\n",
|
||||
"\n",
|
||||
"> (1) Students at a school must meet with the guidance counselor if they have two absences, or three consecutive late days. Each student's attendance record consists of a string of 'A' for absent, 'L' for late, or 'P' for present. For example: \"LAPLPA\" requires a meeting (because there are two absences), and \"LAPLPL\" is OK (there are three late days, but they are not consecutive). Write a function that takes such a string as input and returns `True` if the student's record is OK. \n",
|
||||
"> (1) Students at a school must meet with the guidance counselor if they have two total absences, or three consecutive late days. Each student's attendance record consists of a string of 'A' for absent, 'L' for late, or 'P' for present. For example: \"LAPLPA\" requires a meeting (because there are two absences), and \"LAPLPL\" is OK (there are three late days, but they are not consecutive). Write a function that takes such a string as input and returns `True` if the student's record is OK. \n",
|
||||
"\n",
|
||||
"> (2) Write a function to calculate the number of attendance records of length N that are OK.\n",
|
||||
"\n",
|
||||
@ -88,14 +89,14 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def all_strings(alphabet, N): \n",
|
||||
" \"All length-N strings over the given alphabet.\"\n",
|
||||
" return map(cat, itertools.product(alphabet, repeat=N))\n",
|
||||
"\n",
|
||||
"def total_ok_slow(N: int) -> int:\n",
|
||||
" \"How many strings over 'LAP' of length N are ok?\"\n",
|
||||
" return quantify(all_strings('LAP', N), ok)\n",
|
||||
"\n",
|
||||
"def all_strings(alphabet, N): \n",
|
||||
" \"All length-N strings over the given alphabet.\"\n",
|
||||
" return map(cat, itertools.product(alphabet, repeat=N))\n",
|
||||
"\n",
|
||||
"def quantify(iterable, pred=bool) -> int:\n",
|
||||
" \"Count how many times the predicate is true of items in iterable.\"\n",
|
||||
" return sum(map(pred, iterable))\n",
|
||||
@ -146,17 +147,6 @@
|
||||
"\n",
|
||||
"* What is in the summary? A list of all ok strings is too much. A count of the number of ok strings is not enough. Instead, I will group together the strings that have the same number of `'A'` characters in them, and the same number of consecutive `'L'` characters at the end of the string, and count them. I don't need to count strings that have two or more `'A'` characters, or 3 consecutive `'L'` characters anywhere in the string. And I don't need to worry about runs of 1 or 2 `'L'` characters embedded in the middle of the string. So the summary is a mapping of the form `{(A, L): count, ...}`. \n",
|
||||
"\n",
|
||||
"* For *N* = 2, the summary looks like this:\n",
|
||||
"\n",
|
||||
" #(number_of_A_in_string, number_of_L_at_end_of_string): count\n",
|
||||
" #(A, L): c\n",
|
||||
" {(0, 0): 2, # LP, PP\n",
|
||||
" (0, 1): 1, # PL\n",
|
||||
" (0, 2): 1, # LL\n",
|
||||
" (1, 0): 1, # AP, LA, PA\n",
|
||||
" (1, 1): 1} # AL\n",
|
||||
" \n",
|
||||
"\n",
|
||||
"Here is a function to create the summary for `N+1`, given the summary for `N`:"
|
||||
]
|
||||
},
|
||||
@ -169,7 +159,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def next_summary(prev_summary: dict) -> dict:\n",
|
||||
" \"Given a summary of the form {(A, L): count, ...}, return summary for strings one char longer.\"\n",
|
||||
" \"Given a summary of the form {(A, L): count}, return summary for one char more.\"\n",
|
||||
" summary = defaultdict(int)\n",
|
||||
" for (A, L), c in prev_summary.items():\n",
|
||||
" if A < 1: summary[A+1, 0] += c # transition with 'A'\n",
|
||||
@ -186,15 +176,64 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"defaultdict(int, {(0, 0): 1, (0, 1): 1, (1, 0): 1})"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"Here's a \"bottom-up\" approach for `total_ok` that starts at `0` and works up to `N`:"
|
||||
"next_summary({(0, 0): 1})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"defaultdict(int, {(0, 0): 2, (0, 1): 1, (0, 2): 1, (1, 0): 3, (1, 1): 1})"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"next_summary(_)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"I can annotate that result with the two-letter strings that form each count:\n",
|
||||
"\n",
|
||||
" {(0, 0): 2, # LP, PP\n",
|
||||
" (0, 1): 1, # PL\n",
|
||||
" (0, 2): 1, # LL\n",
|
||||
" (1, 0): 1, # AP, LA, PA\n",
|
||||
" (1, 1): 1} # AL\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Here's an implementation of `total_ok`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
@ -215,58 +254,6 @@
|
||||
"We can use this to go way beyond what we could do with `total_ok_slow`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"CPU times: user 1.28 ms, sys: 16 µs, total: 1.29 ms\n",
|
||||
"Wall time: 1.32 ms\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"5261545087067582125179062608958232695543100705754634272071166414871321070487675367"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%time total_ok(300)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"There are over 10<sup>80</sup> ok strings of length 300; more than the number of atoms in the universe. But it only took around a millisecond to count them.\n",
|
||||
"\n",
|
||||
"Dynamic programming can also be done top-down (where we start at `N` and work down to `0`):"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def total_ok(N) -> int:\n",
|
||||
" \"How many strings of length N are ok?\"\n",
|
||||
" return sum(summary_for(N).values())\n",
|
||||
" \n",
|
||||
"def summary_for(N) -> dict: \n",
|
||||
" \"The {(A, L): count} summary for strings of length N.\"\n",
|
||||
" return ({(0, 0): 1} if N == 0 else next_summary(summary_for(N - 1)))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
@ -276,8 +263,8 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"CPU times: user 1.7 ms, sys: 78 µs, total: 1.77 ms\n",
|
||||
"Wall time: 1.81 ms\n"
|
||||
"CPU times: user 1.61 ms, sys: 8 µs, total: 1.62 ms\n",
|
||||
"Wall time: 1.63 ms\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -295,18 +282,92 @@
|
||||
"%time total_ok(300)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"1.3689147905858837e+143"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"3. ** 300"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We get the same answer in about the same amopunt of time.\n",
|
||||
"There are over 10<sup>80</sup> ok strings of length 300; more than the number of atoms in the universe. But it only took around a millisecond to count them (while ignoring the 3<sup>300</sup> = 10<sup>143</sup> not-ok strings of length 300).\n",
|
||||
"\n",
|
||||
"Dynamic programming can also be done top-down (where we start at `N` and work down to `0`):"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def total_ok(N) -> int:\n",
|
||||
" \"How many strings of length N are ok?\"\n",
|
||||
" return sum(summary_for(N).values())\n",
|
||||
" \n",
|
||||
"def summary_for(N) -> dict: \n",
|
||||
" \"The {(A, L): count} summary for strings of length N.\"\n",
|
||||
" return ({(0, 0): 1} if N == 0 else next_summary(summary_for(N - 1)))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"CPU times: user 1.22 ms, sys: 39 µs, total: 1.26 ms\n",
|
||||
"Wall time: 1.27 ms\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"5261545087067582125179062608958232695543100705754634272071166414871321070487675367"
|
||||
]
|
||||
},
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%time total_ok(300)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We get the same answer in about the same amount of time.\n",
|
||||
"\n",
|
||||
"Let's verify our results against the slow, reliable `total_ok_slow`, and look at the summaries for the first few values of `N`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -355,7 +416,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"execution_count": 15,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
@ -383,7 +444,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -392,7 +453,7 @@
|
||||
"'ok'"
|
||||
]
|
||||
},
|
||||
"execution_count": 13,
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -430,7 +491,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -439,7 +500,7 @@
|
||||
"[1, 1, 2, 5, 15, 52, 203]"
|
||||
]
|
||||
},
|
||||
"execution_count": 14,
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -463,7 +524,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"execution_count": 18,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
@ -481,7 +542,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -490,7 +551,7 @@
|
||||
"47585391276764833658790768841387207826363669686825611466616334637559114497892442622672724044217756306953557882560751"
|
||||
]
|
||||
},
|
||||
"execution_count": 16,
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -501,7 +562,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"execution_count": 20,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
@ -512,7 +573,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"execution_count": 21,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -548,6 +609,165 @@
|
||||
"for k in itertools.chain(range(10), range(10, 121, 10)):\n",
|
||||
" print('{:3} {:12g}'.format(k, how_many(k)))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Sol Golomb’s Rectangle Puzzle\n",
|
||||
"\n",
|
||||
"This problem is covered in depth in [another notebook](Golomb-puzzle.ipynb), so here I present just the part that has to do with counting things:\n",
|
||||
"\n",
|
||||
"> *Say you’re given the following challenge: create a set of five rectangles that have sides of length 1, 2, 3, 4, 5, 6, 7, 8, 9 and 10 units. You can combine sides in a variety of ways: for example, you could create a set of rectangles with dimensions 1 x 3, 2 x 4, 5 x 7, 6 x 8 and 9 x 10. How many different sets of five rectangles are possible?*\n",
|
||||
"\n",
|
||||
"This is a basic [combinatorics](http://en.wikipedia.org/wiki/Combinatorics) or counting problem. I will present *three* methods to count the sets. If all goes well they will give the same answer. The example set of rectangles given in the problem was\n",
|
||||
"\n",
|
||||
"> {1 × 3, 2 × 4, 5 × 7, 6 × 8, 9 × 10}\n",
|
||||
" \n",
|
||||
"and in general it would be\n",
|
||||
"\n",
|
||||
"> {A × B, C × D, E × F, G × H, I × J}\n",
|
||||
"\n",
|
||||
"The question is: how many distinct ways can we assign the integers 1 through 10 to the variables A through J?\n",
|
||||
" \n",
|
||||
"**Method 1: Count all permutations and divide by repetitions:** There are 10 variables to be filled, so there are 10! = 3,628,800 permutations. But if we fill the first two variables with 1 × 3, that is the same rectangle as 3 × 1. So divide 10! by 2<sup>5</sup> to account for the fact that each of 5 rectangles can appear 2 ways. Similarly, if we fill A and B with 1 × 3, that yields the same set as if we filled C and D with 1 × 3. So divide again by 5! (the number of permutations of 5 things) to account for this.\n",
|
||||
"That gives us:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"945.0"
|
||||
]
|
||||
},
|
||||
"execution_count": 22,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"factorial(10) / 2 ** 5 / factorial(5)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"(It is always a relief when this \"count and divide\" method comes out to a whole number.)\n",
|
||||
"\n",
|
||||
"**Method 2: Count without repetitions**: in each rectangle of the example set the smaller component is listed first, and in each set, the rectangles with smaller first components are listed first. An alternate to \"count and divide\" is to count directly how many sets there are that respect this ordering. We'll work from left to right. How many choices are there for variable A? Only one: A must always be 1, because we agreed that the smallest number comes first. Then, given A, there are 9 remaining choices for B. For C, given A and B, there is again only one choice: C must be the smallest of the remaining 8 numbers (it will be 3 if the first rectangle was 1 × 2; otherwise it will be 2, but either way there is only one choice). That leaves 7 choices for D, 5 for F, 3 for H and 1 for J. So:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"945"
|
||||
]
|
||||
},
|
||||
"execution_count": 23,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"9 * 7 * 5 * 3 * 1"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"(It is always a relief when two methods give the same answer.)\n",
|
||||
" \n",
|
||||
"**Method 3: Write a program to enumerate the sets:** We'll represent the 1 × 3 rectangle as the tuple `(1, 3)` and the example set of rectangles as the set\n",
|
||||
"\n",
|
||||
" {(1, 3), (2, 4), (5, 7), (6, 8), (9, 10)}\n",
|
||||
"\n",
|
||||
"We'll write a program to generate all possible sets of rectangles, following method 2, and then just count how many there are. To implement method 2, the minimum side will always be the first element, A, in an (A, B) pair. We iterate through all possible values for B, and then join that pair with all possible rectangles made from the remaining sides. We also have to handle the case when there are no sides; then there is one possible set of rectangles: the empty set."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"945"
|
||||
]
|
||||
},
|
||||
"execution_count": 24,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"def rectangle_sets(sides):\n",
|
||||
" \"Given a set of sides, list all distinct sets of rectangles that can be made.\"\n",
|
||||
" if not sides:\n",
|
||||
" return [ set() ]\n",
|
||||
" else:\n",
|
||||
" A = min(sides)\n",
|
||||
" return [ {(A, B)} | other_rects\n",
|
||||
" for B in sides if B is not A\n",
|
||||
" for other_rects in rectangle_sets(sides - {A, B}) ]\n",
|
||||
" \n",
|
||||
"len(rectangle_sets({1, 2, 3, 4, 5, 6, 7, 8, 9, 0}))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"(It is a relief that once again we get the same answer, 945.) \n",
|
||||
"\n",
|
||||
"Here is a list of the rectangle sets with just 6 sides:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[{(1, 2), (3, 4), (5, 6)},\n",
|
||||
" {(1, 2), (3, 5), (4, 6)},\n",
|
||||
" {(1, 2), (3, 6), (4, 5)},\n",
|
||||
" {(1, 3), (2, 4), (5, 6)},\n",
|
||||
" {(1, 3), (2, 5), (4, 6)},\n",
|
||||
" {(1, 3), (2, 6), (4, 5)},\n",
|
||||
" {(1, 4), (2, 3), (5, 6)},\n",
|
||||
" {(1, 4), (2, 5), (3, 6)},\n",
|
||||
" {(1, 4), (2, 6), (3, 5)},\n",
|
||||
" {(1, 5), (2, 3), (4, 6)},\n",
|
||||
" {(1, 5), (2, 4), (3, 6)},\n",
|
||||
" {(1, 5), (2, 6), (3, 4)},\n",
|
||||
" {(1, 6), (2, 3), (4, 5)},\n",
|
||||
" {(1, 6), (2, 4), (3, 5)},\n",
|
||||
" {(1, 6), (2, 5), (3, 4)}]"
|
||||
]
|
||||
},
|
||||
"execution_count": 25,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"rectangle_sets({1, 2, 3, 4, 5, 6})"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
Loading…
Reference in New Issue
Block a user