Add files via upload

2020-07-25 00:10:33 -07:00
parent cee3abc829
commit 4a2c81fe33
1 changed files with 49 additions and 40 deletions
--- a/ipynb/equilength-numbers.ipynb
+++ b/ipynb/equilength-numbers.ipynb
@@ -27,11 +27,11 @@
    "\n",
    "# The ExpTable Data Structure\n",
    "\n",
-    "Internally, the key data structure is something I'll call a `ExpTable`: a dict where each value is an *expression*: a string such as `\"zero\"` or `\"(two plus nine)\"`; and each key is a tuple of two integers: the numeric value of the expression and the number of letters in the expression. The integers for the language defined above form this table:\n",
+    "Internally, the key data structure is something I'll call an expression table or `ExpTable`: a dict where each value is an *expression*: a string such as `\"zero\"` or `\"(two plus nine)\"`; and each key is a tuple of two integers: the numeric value of the expression and the number of letters in the expression. The integers for the mini language defined above form this table:\n",
    "\n",
    "    {(0, 4): 'zero', (1, 3): 'one', (2, 3): 'two'}, (3, 5): 'three', (4, 4): 'four')\n",
    "    \n",
-    "The key `(0, 4)` means that the expression `'zero'` has the value `0` and has `4` letters. I arrange things this way because I want to eventually build up equilength expressions, ones where the key is `(i, i)` for some integer `i`, and to do so I only need a table with one entry for each `(value, letters)` combination.\n",
+    "The key `(0, 4)` means that the expression `'zero'` has the value `0` and has `4` letters. I arrange things this way because I want to eventually build up equilength expressions, ones where the key is `(i, i)` for as many integers `i` as possible, and to do so I only need one table entry for each `(value, letters)` combination.\n",
    "Let's implement that:"
   ]
  },
@@ -51,18 +51,18 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "ExpTable = dict # A mapping of {(value, number_of_letters): \"expression\"}\n",
+    "ExpTable = dict # A mapping of {(value, count_of_letters): \"expression\"}\n",
    "\n",
    "Language = namedtuple('Language', 'name, operators, integers')\n",
    "\n",
    "def language(name, operators, integers) -> Language:\n",
    "    \"\"\"E.g., language({add: 'plus, and', mul: 'times'}, 'zero, one, two')\"\"\"\n",
    "    return Language(name, {op: split(operators[op]) for op in operators},\n",
-    "                    ExpTable(make_item(i, w) for i, w in enumerate(split(integers))))\n",
+    "                    exptable(enumerate(split(integers))))\n",
    "        \n",
-    "def make_item(value, exp) -> tuple: \n",
-    "    \"\"\"A ((value, number_of_letters), exp) tuple that makes a ExpTable item.\"\"\"\n",
-    "    return (value, lettercount(exp)), exp\n",
+    "def exptable(items) -> ExpTable:\n",
+    "    \"\"\"Convert an iterable of (value, \"exp\") pairs to {(value, letter_count): \"exp\"}\"\"\"\n",
+    "    return {(val, lettercount(exp)): exp for (val, exp) in items}\n",
    "\n",
    "def lettercount(exp) -> int: return sum(ch.isalpha() for ch in exp)\n",
    "\n",
@@ -99,9 +99,9 @@
    "  - Given the table; use `equilength` to pull just the equilength expressions from the table.\n",
    "        \n",
    "    \n",
-    "Note this doesn't form all possible expressions: it gives me **branching** expressions trees but not **bushy** expression trees. Consider the two trees below; each with eight leaves (integers) and seven internal nodes (operators):\n",
-    "- **Below left**: a bushy tree that could have been formed by `c=3` iterations of combining any two table entries, <br>e.g. `(((a+b)+(c+d))+((e+f)+(g+h)))`\n",
-    "- **Below right**: a branching tree formed by `c=7` iterations of combining an integer with a table entry, <br>e.g. \n",
+    "Note this doesn't form all possible expressions: it gives me **branching** expression trees but not **bushy** expression trees. Consider the two trees below; each with eight leaves (integers) and seven internal nodes (operators):\n",
+    "- **Below left: a bushy tree** that could have been formed by `c=3` iterations of combining any two table entries, <br>e.g. `(((a+b)+(c+d))+((e+f)+(g+h)))`\n",
+    "- **Below right: a branching tree** formed by `c=7` iterations of combining an integer with a table entry, <br>e.g. \n",
    "`(h+((f+(e+(((a+b)+c)+d)))+g))`.\n",
    "\n",
    "\n",
@@ -113,7 +113,7 @@
    "                   /\\\n",
    "                  /\\\n",
    "\n",
-    "Here is the code:"
+    "Here is the code to create `expressions`:"
   ]
  },
  {
@@ -133,12 +133,12 @@
    "\n",
    "def combine(Ltable, operators, Rtable) -> ExpTable:\n",
    "    \"\"\"Return table like {(5, 12): \"(two plus three)\"} by combining table entries with ops.\"\"\"\n",
-    "    return dict(make_item(op(lv, rv), f'({Ltable[lv, ln]} {opname} {Rtable[rv, rn]})')\n",
-    "                for (rv, rn) in Rtable\n",
-    "                for op in operators\n",
-    "                if not (rv == 0 and op == div) # Don't divide by zero\n",
-    "                for (lv, ln) in Ltable\n",
-    "                for opname in operators[op])\n",
+    "    return exptable((op(lv, rv), f'({Ltable[lv, ln]} {opname} {Rtable[rv, rn]})')\n",
+    "                    for (rv, rn) in Rtable\n",
+    "                    for op in operators\n",
+    "                    if not (rv == 0 and op == div) # Don't divide by zero\n",
+    "                    for (lv, ln) in Ltable\n",
+    "                    for opname in operators[op])\n",
    "\n",
    "def equilength(table) -> dict:\n",
    "    \"\"\"Return only table expressions that evaluate to n and have n letters.\"\"\"\n",
@@ -228,7 +228,16 @@
    {
     "data": {
      "text/plain": [
-       "{4: 'four', 20: '((one plus four) times four)'}"
+       "{4: 'four',\n",
+       " 20: '((one plus four) times four)',\n",
+       " 24: '(((two and two) and two) times four)',\n",
+       " 25: '(((two and four) times four) and one)',\n",
+       " 26: '(((two and four) times four) plus two)',\n",
+       " 27: '(((one and four) and four) times three)',\n",
+       " 28: '(((zero and three) and four) times four)',\n",
+       " 29: '(((three plus four) times four) plus one)',\n",
+       " 30: '(((two times three) and four) times three)',\n",
+       " 31: '(((three plus four) times four) plus three)'}"
      ]
     },
     "execution_count": 7,
@@ -237,7 +246,7 @@
    }
   ],
   "source": [
-    "equilength(expressions(mini, 2)) # The equilength expressions after 2 iterations of `combine`"
+    "equilength(expressions(mini, 3)) # The equilength expressions after 3 iterations of `combine`"
   ]
  },
  {
@@ -979,7 +988,7 @@
    "\n",
    "My intution is that (1) and (2) will give me \"bushier\" expressions that are unlikely to help much. They will have lots of different numerical values, but the new expressions will have roughly the same number of letters in them as the previous expressions. \n",
    "\n",
-    "Therefore, I'm going to try approach (3), which has the added advantage that I don't have to  alter `word_expessions` or `combine`. I'm going to define an `extended` language were we add to the `language.integers` field. I could just add regular integers. But the problem is that an integer like \"ninety\" has a lot fewer letters than its value; I think there would be an imbalance. Instead I'll add *pseudo-integers:* for each integer (like `\"two\"`) in the base language, the extended language will have new expressions like `(two plus two plus two...)`. The idea is that these will be better building blocks when we `combine` an integer with a table entry."
+    "Therefore, I'm going to try approach (3), which has the added advantage that I don't have to  alter `word_expessions` or `combine`. I'm going to define an `extended` language were we add to the `language.integers` field. I could just add regular integers, perhaps going up to a hundred rather than just thirty. But the problem is that an integer like \"ninety\" has a lot fewer letters than its value; I think there would be an imbalance. Instead I'll add *pseudo-integers:* for each integer (like `\"two\"`) in the base language, the extended language will have new expressions like `(two plus two plus two...)`. The idea is that these will be better building blocks when we `combine` an integer with a table entry."
   ]
  },
  {
@@ -991,10 +1000,10 @@
    "def extended(language, repeats=6) -> Language:\n",
    "    \"\"\"Extend language by adding \"repeated\" integers, like \"two plus two plus two\".\"\"\"\n",
    "    name, ops, ints = language\n",
-    "    new_ints = dict(make_item(i * r, '(' + f' {op} '.join([exp] * r) + ')')\n",
-    "                for r in range(2, repeats + 1)\n",
-    "                for (i, _), exp in ints.items()\n",
-    "                for op in ops[add])\n",
+    "    new_ints = exptable((i * r, '(' + f' {op} '.join([exp] * r) + ')')\n",
+    "                        for r in range(2, repeats + 1)\n",
+    "                        for (i, _), exp in ints.items()\n",
+    "                        for op in ops[add])\n",
    "    return Language('Extended ' + name, ops, {**ints, **new_ints})"
   ]
  },
@@ -1057,7 +1066,7 @@
    "\n",
    "Come to think of it, I will alter `expressions` after all. Not to accomodate some new strategy, but rather to be more efficient. The previous version of `expressions` tries to combine each integer on either the left or the right of an existing expression in `table`. That's inefficient for two reasons:\n",
    "- On the first iteration, we're combining an integer with an integer; there's only one way to do that, so trying two ways is redundant.\n",
-    "- On subsequent iterations, given, say, the integer \"two\" and the expression \"(one plus two)\", it does make sense to combine them both ways to give us \"(two minus (one plus two))\" and \"((one plus two) minus two)\" because the respective values are -1 and 1, but it is redundant to combine them both ways with addition, because the resulting value is 5 either way. So we will restrict the second call to `combine` to only noncommutative operations."
+    "- On subsequent iterations, given, say, the integer \"two\" and the expression \"(one plus two)\", I want both \"(two minus (one plus two))\" and \"((one plus two) minus two)\" because the respective values are -1 and 1, but it is redundant to combine them both ways with addition (or multiplication), because the resulting value is 5 (or 6) either way, because addition (or multiplication) is commutative. So we will restrict the second call to `combine` to only noncommutative operations."
   ]
  },
  {
@@ -1102,8 +1111,8 @@
     "text": [
      "Extended English has 5 equilengths: 4, 14, 21, 24, 32\n",
      "     from 488 table entries in 0 iterations\n",
-      "CPU times: user 5.15 ms, sys: 1.17 ms, total: 6.32 ms\n",
-      "Wall time: 10.9 ms\n"
+      "CPU times: user 2.72 ms, sys: 89 µs, total: 2.81 ms\n",
+      "Wall time: 2.79 ms\n"
     ]
    },
    {
@@ -1136,8 +1145,8 @@
     "text": [
      "Extended English has 150 equilengths: 4, 10-148, 150-154, 156, 158, 162-163, 168\n",
      "     from 275,310 table entries in 1 iterations\n",
-      "CPU times: user 26.3 s, sys: 138 ms, total: 26.4 s\n",
-      "Wall time: 26.9 s\n"
+      "CPU times: user 26.1 s, sys: 282 ms, total: 26.4 s\n",
+      "Wall time: 26.8 s\n"
     ]
    },
    {
@@ -1322,8 +1331,8 @@
     "text": [
      "Extended Spanish has 77 equilengths: 5, 10-12, 14-22, 24-61, 63-73, 76-83, 85, 87-88, 92-93, 96-97\n",
      "     from 77,695 table entries in 1 iterations\n",
-      "CPU times: user 1.46 s, sys: 9.26 ms, total: 1.47 s\n",
-      "Wall time: 1.48 s\n"
+      "CPU times: user 1.49 s, sys: 16.9 ms, total: 1.5 s\n",
+      "Wall time: 1.51 s\n"
     ]
    },
    {
@@ -1435,8 +1444,8 @@
     "text": [
      "Extended French has 107 equilengths: 7-12, 14-94, 96-105, 107-111, 113-115, 119-120\n",
      "     from 163,204 table entries in 1 iterations\n",
-      "CPU times: user 6.5 s, sys: 20.6 ms, total: 6.52 s\n",
-      "Wall time: 6.54 s\n"
+      "CPU times: user 6.5 s, sys: 72.1 ms, total: 6.57 s\n",
+      "Wall time: 6.62 s\n"
     ]
    },
    {
@@ -1578,8 +1587,8 @@
     "text": [
      "Extended Italian has 91 equilengths: 3, 9-13, 15-28, 30-64, 66-78, 80-87, 89-92, 94-96, 99, 104-105, 108, 114, 116, 124, 128\n",
      "     from 58,522 table entries in 1 iterations\n",
-      "CPU times: user 1.08 s, sys: 3.94 ms, total: 1.09 s\n",
-      "Wall time: 1.09 s\n"
+      "CPU times: user 1.1 s, sys: 14 ms, total: 1.12 s\n",
+      "Wall time: 1.13 s\n"
     ]
    },
    {
@@ -1707,8 +1716,8 @@
     "text": [
      "Extended Chinese has 119 equilengths: 2-3, 7-106, 108-111, 114-116, 120, 123, 125-126, 128, 133, 136-138, 142\n",
      "     from 156,099 table entries in 1 iterations\n",
-      "CPU times: user 3.97 s, sys: 7.75 ms, total: 3.98 s\n",
-      "Wall time: 3.98 s\n"
+      "CPU times: user 4.12 s, sys: 49.3 ms, total: 4.17 s\n",
+      "Wall time: 4.21 s\n"
     ]
    },
    {
@@ -1862,8 +1871,8 @@
     "text": [
      "Extended German has 139 equilengths: 4, 11-13, 15-138, 140, 143-146, 150-152, 156-157, 162\n",
      "     from 157,788 table entries in 1 iterations\n",
-      "CPU times: user 7.28 s, sys: 10.2 ms, total: 7.29 s\n",
-      "Wall time: 7.3 s\n"
+      "CPU times: user 7.31 s, sys: 79.2 ms, total: 7.39 s\n",
+      "Wall time: 7.45 s\n"
     ]
    },
    {
@@ -2027,7 +2036,7 @@
    "\n",
    "# Summary\n",
    "\n",
-    "Here are the number of equilength expressions found for each language, for `expressions` with `c=0` and `c=2`, for infinite additions, and for `extended` languages with `c=1`. English has the most on all counts, but I think that is largely because I gave it 11 operator names, while most of the other languages only have 4 or 5. German has the next most, which I believe is due to the fact that German integer names are at least one letter longer than the other languages, on average.\n",
+    "Here is a table of the number of equilength expressions found for each language, for `expressions` with `c=0` and `c=2`, for infinite additions, and for `extended` languages with `c=1`. English has the most on all counts, but I think that is largely because I gave it 11 operator names, while most of the other languages only have 4 or 5. German has the next most, which I believe is due to the fact that German integer names are longer than the other languages, on average.\n",
    "\n",
    "|Language|c=0|c=2|∞|extended, c=1|\n",
    "|---|--|--|--|--|\n",