Add files via upload

2021-09-06 00:59:49 -07:00 · 2021-09-06 00:59:49 -07:00 · d543d337bf
commit d543d337bf
parent 060846ece1
1 changed files with 121 additions and 160 deletions
--- a/ipynb/SplitStates.ipynb
+++ b/ipynb/SplitStates.ipynb
@ -13,13 +13,14 @@
    "> Given a map of the lower 48 states of the  USA, remove a subset of the states so that the map is cut into two  disjoint contiguous regions that are near-halves by area. Call the regions *A* and *B*, where *A* has the larger area. You can treat Michigan’s upper and lower peninsulas as two non-adjacent \"states,\" for a total of 49. \n",
    ">\n",
    "> To be precise, 538's question is: \n",
-    "> 1. What states should you remove to maximize the area of *B*? What is *B*'s area and percent of the country's area?\n",
    ">\n",
-    "> There is some ambiguity in the phrase \"near-halves by area\" and [Philip Bump](https://twitter.com/pbump/status/1400185939629117442) is interested in another question:\n",
+    "> **1.** What states should you remove to maximize the area of *B*? What is *B*'s area and percent of the country's area?\n",
    ">\n",
-    "> 2. What states should you remove to minimize the difference of the area of *A* and the area of *B*? \n",
+    "> There is some ambiguity in the phrase \"near-halves by area\" and [Philip Bump](https://twitter.com/pbump/status/1400185939629117442) is interested in a second question:\n",
    ">\n",
-    "> Bump hypothesized that {IL, MO, OK, NM} is the best subset to remove. Is he right?\n",
+    "> **2.** What states should you remove to minimize the difference of the area of *A* and the area of *B*? \n",
+    ">\n",
+    "> Philip Bump hypothesized that {IL, MO, OK, NM} is the best subset to remove. Is he right?\n",
    "\n",
    "# Vocabulary terms \n",
    "\n",
@ -44,33 +45,32 @@
   "outputs": [],
   "source": [
    "from typing import *\n",
+    "from collections import defaultdict\n",
    "\n",
-    "State = str\n",
-    "States = Region = frozenset # Hashable set of states\n",
-    "\n",
+    "State  = str       # Two-letter abbrerviation\n",
+    "States = frozenset # Any set of states\n",
+    "Region = frozenset # A contiguous set of states\n",
    "Split  = Tuple[Region, Region, Region] # (A, B, C) = (large region, small region, cut)\n",
    "\n",
    "def states(string)   -> States: \"Set of states\";  return States(string.split())\n",
+    "def statedict(**dic) -> dict:   \"{State:States}\"; return {ST: states(dic[ST]) for ST in dic}\n",
+    "\n",
+    "neighbors = statedict( # https://theincidentaleconomist.com/wordpress/list-of-neighboring-states-with-stata-code/\n",
+    "    AK='', AL='FL GA MS TN', AR='LA MO MS OK TN TX', AZ='CA CO NM NV UT', CA='AZ NV OR', \n",
+    "    CO='AZ KS NE NM OK UT WY', CT='MA NY RI', DC='MD VA', DE='MD NJ PA', FL='AL GA', \n",
+    "    GA='AL FL NC SC TN', HI='', IA='IL MN MO NE SD WI', ID='MT NV OR UT WA WY', IL='IA IN KY MO WI', \n",
+    "    IN='IL KY LP MI OH', KS='CO MO NE OK', KY='IL IN MO OH TN VA WV', LA='AR MS TX', \n",
+    "    MA='CT NH NY RI VT', MD='DC DE PA VA WV', ME='NH', MI='IN OH WI', MN='IA ND SD WI', \n",
+    "    MO='AR IA IL KS KY NE OK TN', MS='AL AR LA TN', MT='ID ND SD WY', NC='GA SC TN VA', \n",
+    "    ND='MN MT SD', NE='CO IA KS MO SD WY', NH='MA ME VT', NJ='DE NY PA', NM='AZ CO OK TX UT', \n",
+    "    NV='AZ CA ID OR UT', NY='CT MA NJ PA VT', OH='IN KY LP MI PA WV', OK='AR CO KS MO NM TX', \n",
+    "    OR='CA ID NV WA', PA='DE MD NJ NY OH WV', RI='CT MA', SC='GA NC', SD='IA MN MT ND NE WY', \n",
+    "    TN='AL AR GA KY MO MS NC VA', TX='AR LA NM OK', UT='AZ CO ID NM NV WY', VA='DC KY MD NC TN WV', \n",
+    "    VT='MA NH NY', WA='ID OR', WI='IA IL MI MN UP', WV='KY MD OH PA VA', WY='CO ID MT NE SD UT', \n",
+    "    UP='WI', LP='IN OH')\n",
    "\n",
    "def area(states) -> int: \"Total area\"; return sum(areas[s] for s in states)\n",
    "\n",
-    "neighbors = dict( # https://theincidentaleconomist.com/wordpress/list-of-neighboring-states-with-stata-code/\n",
-    "    AK=states(''), AL=states('FL GA MS TN'), AR=states('LA MO MS OK TN TX'), AZ=states('CA CO NM NV UT'), \n",
-    "    CA=states('AZ NV OR'), CO=states('AZ KS NE NM OK UT WY'), CT=states('MA NY RI'), DC=states('MD VA'), \n",
-    "    DE=states('MD NJ PA'), FL=states('AL GA'), GA=states('AL FL NC SC TN'), HI=states(''), \n",
-    "    IA=states('IL MN MO NE SD WI'), ID=states('MT NV OR UT WA WY'), IL=states('IA IN KY MO WI'), \n",
-    "    IN=states('IL KY LP MI OH'), KS=states('CO MO NE OK'), KY=states('IL IN MO OH TN VA WV'), \n",
-    "    LA=states('AR MS TX'), MA=states('CT NH NY RI VT'), MD=states('DC DE PA VA WV'), ME=states('NH'), \n",
-    "    MI=states('IN OH WI'), MN=states('IA ND SD WI'), MO=states('AR IA IL KS KY NE OK TN'), \n",
-    "    MS=states('AL AR LA TN'), MT=states('ID ND SD WY'), NC=states('GA SC TN VA'), ND=states('MN MT SD'), \n",
-    "    NE=states('CO IA KS MO SD WY'), NH=states('MA ME VT'), NJ=states('DE NY PA'), NM=states('AZ CO OK TX UT'), \n",
-    "    NV=states('AZ CA ID OR UT'), NY=states('CT MA NJ PA VT'), OH=states('IN KY LP MI PA WV'), \n",
-    "    OK=states('AR CO KS MO NM TX'), OR=states('CA ID NV WA'), PA=states('DE MD NJ NY OH WV'), \n",
-    "    RI=states('CT MA'), SC=states('GA NC'), SD=states('IA MN MT ND NE WY'), TN=states('AL AR GA KY MO MS NC VA'), \n",
-    "    TX=states('AR LA NM OK'), UT=states('AZ CO ID NM NV WY'), VA=states('DC KY MD NC TN WV'), \n",
-    "    VT=states('MA NH NY'), WA=states('ID OR'), WI=states('IA IL MI MN UP'), WV=states('KY MD OH PA VA'), \n",
-    "    WY=states('CO ID MT NE SD UT'), UP=states('WI'), LP=states('IN OH'))\n",
-    "\n",
    "areas = dict( # https://www.census.gov/geographies/reference-files/2010/geo/state-area.html\n",
    "    AK=665384, AL=52420,  AZ=113990, AR=53179, CA=163695, CO=104094, CT=5543,  DE=2489,   DC=68, \n",
    "    FL=65758,  GA=59425,  HI=10932,  ID=83569, IL=57914,  IN=36420,  IA=56273, KS=82278,  KY=40408, \n",
@ -88,8 +88,8 @@
    "\n",
    "# \"Countries\":\n",
    "usa50 = States(areas) - states('DC UP LP')           # 50 actual US states\n",
-    "usa48 = States(areas) - states('AK HI DC UP LP')     # lower 48 states\n",
    "usa49 = States(areas) - states('AK HI DC MI')        # lower 49 \"states\": MI split into UP, LP\n",
+    "usa48 = States(areas) - states('AK HI DC UP LP')     # lower 48 states\n",
    "four  = states('UT CO AZ NM')                        # The \"four corners\" states\n",
    "western = states('WA OR CA ID NV UT AZ MT WY CO NM') # The 11 states west of the Rockies"
   ]
@ -107,17 +107,15 @@
    "\n",
    "# Making cuts\n",
    "\n",
-    "Can we generate all possible cuts? A cut is a subset of the 49 states, so there are 2<sup>49</sup> or 500 trillion possible cuts, so **no**.  \n",
+    "Is it feasible to consider all possible cuts? A cut is a subset of the 49 states, so there are 2<sup>49</sup> or 500 trillion possible cuts, so **no**, we can't look at them all.  I have four ideas to reduce the number of cuts considered:\n",
+    "- **Limit the total area in a cut.** A large area in the cut means there won't be much area left to make *B* big. \n",
+    "- **Limit the number of states in a cut.** Similarly, if there are too many states in a cut, there won't be many left for *A* or *B*.\n",
+    "- **Make cuts contiguous.** Noncontiguous cuts can't be optimal for question 1, so I won't consider them for now.\n",
+    "- **Make cuts go border-to-border.** A cut can produce exactly two regions only if (a) the cut runs from one place on the border to another place on the border or (b) the cut forms a \"donut\" that surrounds some interior region. The US map isn't big enough to support a decent-sized donut (there are only 14 non-border states, and only KS and NE are not neighbors of a border state). \n",
    "\n",
-    "I have four ideas to limit the number of cuts:\n",
-    "- **Make cuts small in number of states.** I'll consider subsets of up to 8 states.  That reduces the possibilites a million-fold to 500 million.\n",
-    "- **Make cuts small in area.** A large area in the cut means there won't be much area left to make region *B* big. I'll limit the cut area.\n",
-    "- **Make cuts contiguous.** Noncontiguous cuts can't be optimal for question 1, so I won't consider them.\n",
-    "- **Make cuts go border-to-border.** A cut can produce exactly two regions only if (a) the cut runs from one place on the border to another place on the border or (b) the cut forms a \"donut\" that surrounds some interior region. The US map isn't big enough to support a decent-sized donut (there are only 14 non-border states, and only two of those, KS and NE, are not neighbors of a border state). Therefore I'll look only for cuts that go border-to-border. \n",
+    "By default, the function `make_cuts` will yield all cuts that are contiguous regions up to twice the area and twice the number of states as the {IL, MO, NM, OK} cut, as long as they go from the north border of the US to the south border.\n",
    "\n",
-    "The function `make_cuts` generates  all contiguous regions of up to `maxsize` states (by default, 8)  and up to `maxarea` area (by default, twice the area of the {IL, MO, OK, NM} cut) that contain one of the `start` states (by default, the states on the north border), as well as one of the `end` states (by default, the states on the south border). \n",
-    "\n",
-    "Most of the work is done by the function `contiguous_regions`, which starts by building a set of regions where each region contains a single `start` state. Then in each iteration of the `while` loop, it yields each region from  the current set of regions, and creates a new set of regions formed by adding a neighboring state to a current region in all possible ways, as long as the area does not exceed `maxarea` and the size does not exceed `maxsize`. (On each iteration all the regions have the same size.)"
+    "It starts by building a set of regions where each region contains a single `start` state. Then in each iteration of the `while` loop, it yields each region from  the current set of regions that intersects the `end` states, and creates a new set of regions formed by adding a neighboring state to a current region in all possible ways, as long as the area does not exceed `maxarea` and the size does not exceed `maxsize`. (On each iteration all the regions have the same size.)"
   ]
  },
  {
@ -126,29 +124,24 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "maxarea = 2 * area(states('IL MO OK NM'))\n",
+    "maxarea = 2 * area(states('IL MO NM OK'))\n",
    "\n",
    "def make_cuts(country, maxsize=8, maxarea=maxarea, start=north, end=south) -> Iterator[Region]:\n",
-    "    \"\"\"All regions up to `maxsize` and up to `maxarea` that contain a `start` and `end` state.\"\"\"\n",
-    "    return filter(end.intersection, contiguous_regions(country, maxsize, maxarea, start))\n",
-    "        \n",
-    "def contiguous_regions(country, maxsize, maxarea, start) -> Iterator[Region]:\n",
-    "    \"\"\"Contiguous regions up to `maxsize` and `maxarea` that contain one of `start`.\"\"\"\n",
+    "    \"\"\"All contiguous regions up to `maxsize` and `maxarea` that contain a `start` and `end` state.\"\"\"\n",
    "    regions = {Region({s}) for s in start & country} \n",
    "    while regions:\n",
-    "        yield from regions \n",
+    "        yield from filter(end.intersection, regions) \n",
    "        regions = {region | {s1}\n",
    "                   for region in regions if len(region) + 1 <= maxsize \n",
-    "                   for s in region for s1 in neighbors[s] \n",
-    "                   if s1 in country and s1 not in region\n",
-    "                   and area(region) + areas[s1] <= maxarea} "
+    "                   for s in region for s1 in (neighbors[s] & country) - region\n",
+    "                   if area(region) + areas[s1] <= maxarea} "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "For example, the north-south cuts of size up to 3 through the western states:"
+    "For example, the north-south cuts of size up to 3 states:"
   ]
  },
  {
@ -159,12 +152,12 @@
    {
     "data": {
      "text/plain": [
-       "{frozenset({'ID', 'NM', 'UT'}),\n",
-       " frozenset({'CA', 'ID', 'NV'}),\n",
+       "{frozenset({'AZ', 'ID', 'UT'}),\n",
+       " frozenset({'ID', 'NM', 'UT'}),\n",
       " frozenset({'CA', 'ID', 'OR'}),\n",
+       " frozenset({'CA', 'ID', 'NV'}),\n",
       " frozenset({'AZ', 'ID', 'NV'}),\n",
-       " frozenset({'CA', 'OR', 'WA'}),\n",
-       " frozenset({'AZ', 'ID', 'UT'})}"
+       " frozenset({'CA', 'OR', 'WA'})}"
      ]
     },
     "execution_count": 3,
@ -173,7 +166,7 @@
    }
   ],
   "source": [
-    "set(make_cuts(western, 3))"
+    "set(make_cuts(usa49, 3))"
   ]
  },
  {
@ -199,7 +192,8 @@
    "        noncut = country - C\n",
    "        A = floodfill(noncut) \n",
    "        B = floodfill(noncut - A)\n",
-    "        if A and B and A | B == noncut:\n",
+    "        if A and B and (A | B | C == country):\n",
+    "            if area(B) > area(A): A, B = B, A # Ensure A larger than B\n",
    "            B, A = sorted([A, B], key=area) \n",
    "            yield (A, B, C)\n",
    "            \n",
@ -286,10 +280,10 @@
    "def show(title, country, split):\n",
    "    \"\"\"Print a title, and a summary of the split in four rows. The columns shown are:\n",
    "    'region name|area|percent of country area|number of states in region|states in region'.\n",
-    "    The ∆ row is not a region; it is the difference in area between A and B.\"\"\"\n",
+    "    The ∆ row of the table is not a region; it is the difference in area between A and B.\"\"\"\n",
    "    A, B, C = split\n",
    "    def print_row(name, region, sqmi): \n",
-    "        statelist = f'{len(region):2d}|{\" \".join(sorted(region))}' if region else ''\n",
+    "        statelist = f'{len(region):2d}|{{{\",\".join(sorted(region))}}}' if region else ''\n",
    "        print(f'{name}|{sqmi:9,d}|{sqmi/area(country):5.1%}|{statelist}')\n",
    "    print(f'\\n{title}:')\n",
    "    print_row('A', A,  area(A))\n",
@ -310,15 +304,15 @@
      "49 states ⇒ 43,901 cuts (maxsize ≤ 8, area ≤ 638,220) ⇒ 14,149 splits.\n",
      "\n",
      "1. Split that maximizes area(B):\n",
-      "A|1,345,558|43.1%|29|AL AR CT DE FL GA IN KS KY LA LP MA MD ME MS NC NH NJ NY OH OK PA RI SC TN TX VA VT WV\n",
-      "B|1,344,149|43.1%|15|AZ CA IA ID MN MT ND NV OR SD UP UT WA WI WY\n",
-      "C|  430,653|13.8%| 5|CO IL MO NE NM\n",
+      "A|1,345,558|43.1%|29|{AL,AR,CT,DE,FL,GA,IN,KS,KY,LA,LP,MA,MD,ME,MS,NC,NH,NJ,NY,OH,OK,PA,RI,SC,TN,TX,VA,VT,WV}\n",
+      "B|1,344,149|43.1%|15|{AZ,CA,IA,ID,MN,MT,ND,NV,OR,SD,UP,UT,WA,WI,WY}\n",
+      "C|  430,653|13.8%| 5|{CO,IL,MO,NE,NM}\n",
      "∆|    1,409| 0.0%|\n",
      "\n",
      "2. Split that minimizes ∆ = area(A) - area(B):\n",
-      "A|1,267,033|40.6%|14|AZ CA IA ID MN MT ND NV OR UP UT WA WI WY\n",
-      "B|1,266,994|40.6%|27|AL AR CT DE FL GA KS KY LA LP MA MD ME MS NC NH NJ NY OH OK PA RI SC TX VA VT WV\n",
-      "C|  586,333|18.8%| 8|CO IL IN MO NE NM SD TN\n",
+      "A|1,267,033|40.6%|14|{AZ,CA,IA,ID,MN,MT,ND,NV,OR,UP,UT,WA,WI,WY}\n",
+      "B|1,266,994|40.6%|27|{AL,AR,CT,DE,FL,GA,KS,KY,LA,LP,MA,MD,ME,MS,NC,NH,NJ,NY,OH,OK,PA,RI,SC,TX,VA,VT,WV}\n",
+      "C|  586,333|18.8%| 8|{CO,IL,IN,MO,NE,NM,SD,TN}\n",
      "∆|       39| 0.0%|\n"
     ]
    }
@ -327,23 +321,6 @@
    "answers(usa49)"
   ]
  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Here are maps of the two answer splits (courtesy [electoralvotemap.com](https://electoralvotemap.com/)):\n",
-    "\n",
-    "1. The cut {CO, IL, MO, NE, NM} gives a *B* (to the west) with area 1,344,149, or 43.1% of the lower 48: \n",
-    "\n",
-    "![map3.png](map3.png)\n",
-    "\n",
-    "2. The cut {CO, IL, IN, MO, NE, NM, SD, TN} gives two regions that differ in area by only 39 square miles. You can think of this as starting from the cut in (1) and then cutting SD from the western region and TN and IN from the eastern region, to even out the areas:\n",
-    "\n",
-    "![map4.png](map4.png)\n",
-    "\n",
-    "Both these results are better than the 4-state cut {IL, MO, OK, NM}. "
-   ]
-  },
  {
   "cell_type": "markdown",
   "metadata": {},
@ -365,15 +342,15 @@
      "50 states ⇒ 45,810 cuts (maxsize ≤ 8, area ≤ 638,220) ⇒ 14,137 splits.\n",
      "\n",
      "1. Split that maximizes area(B):\n",
-      "A|1,345,626|43.1%|30|AL AR CT DC DE FL GA IN KS KY LA LP MA MD ME MS NC NH NJ NY OH OK PA RI SC TN TX VA VT WV\n",
-      "B|1,344,149|43.1%|15|AZ CA IA ID MN MT ND NV OR SD UP UT WA WI WY\n",
-      "C|  430,653|13.8%| 5|CO IL MO NE NM\n",
+      "A|1,345,626|43.1%|30|{AL,AR,CT,DC,DE,FL,GA,IN,KS,KY,LA,LP,MA,MD,ME,MS,NC,NH,NJ,NY,OH,OK,PA,RI,SC,TN,TX,VA,VT,WV}\n",
+      "B|1,344,149|43.1%|15|{AZ,CA,IA,ID,MN,MT,ND,NV,OR,SD,UP,UT,WA,WI,WY}\n",
+      "C|  430,653|13.8%| 5|{CO,IL,MO,NE,NM}\n",
      "∆|    1,477| 0.0%|\n",
      "\n",
      "2. Split that minimizes ∆ = area(A) - area(B):\n",
-      "A|1,267,062|40.6%|28|AL AR CT DC DE FL GA KS KY LA LP MA MD ME MS NC NH NJ NY OH OK PA RI SC TX VA VT WV\n",
-      "B|1,267,033|40.6%|14|AZ CA IA ID MN MT ND NV OR UP UT WA WI WY\n",
-      "C|  586,333|18.8%| 8|CO IL IN MO NE NM SD TN\n",
+      "A|1,267,062|40.6%|28|{AL,AR,CT,DC,DE,FL,GA,KS,KY,LA,LP,MA,MD,ME,MS,NC,NH,NJ,NY,OH,OK,PA,RI,SC,TX,VA,VT,WV}\n",
+      "B|1,267,033|40.6%|14|{AZ,CA,IA,ID,MN,MT,ND,NV,OR,UP,UT,WA,WI,WY}\n",
+      "C|  586,333|18.8%| 8|{CO,IL,IN,MO,NE,NM,SD,TN}\n",
      "∆|       29| 0.0%|\n"
     ]
    }
@ -410,15 +387,15 @@
      "48 states ⇒ 43,941 cuts (maxsize ≤ 8, area ≤ 638,220) ⇒ 19,811 splits.\n",
      "\n",
      "1. Split that maximizes area(B):\n",
-      "A|1,348,646|43.2%|30|AL CT DE FL GA IA IL IN KY MA MD ME MI MN MT NC ND NH NJ NY OH PA RI SC SD TN VA VT WI WV\n",
-      "B|1,341,666|43.0%|12|AZ CA CO KS LA NM NV OK OR TX UT WA\n",
-      "C|  430,048|13.8%| 6|AR ID MO MS NE WY\n",
+      "A|1,348,646|43.2%|30|{AL,CT,DE,FL,GA,IA,IL,IN,KY,MA,MD,ME,MI,MN,MT,NC,ND,NH,NJ,NY,OH,PA,RI,SC,SD,TN,VA,VT,WI,WV}\n",
+      "B|1,341,666|43.0%|12|{AZ,CA,CO,KS,LA,NM,NV,OK,OR,TX,UT,WA}\n",
+      "C|  430,048|13.8%| 6|{AR,ID,MO,MS,NE,WY}\n",
      "∆|    6,980| 0.2%|\n",
      "\n",
      "2. Split that minimizes ∆ = area(A) - area(B):\n",
-      "A|1,267,816|40.6%|13|AZ CA ID KS MN MT ND NE NV OR SD UT WA\n",
-      "B|1,267,672|40.6%|28|AL AR CT DE FL GA IL IN KY LA MA MD ME MI MS NC NH NJ NY OH PA RI SC TN TX VA VT WV\n",
-      "C|  584,872|18.7%| 7|CO IA MO NM OK WI WY\n",
+      "A|1,267,816|40.6%|13|{AZ,CA,ID,KS,MN,MT,ND,NE,NV,OR,SD,UT,WA}\n",
+      "B|1,267,672|40.6%|28|{AL,AR,CT,DE,FL,GA,IL,IN,KY,LA,MA,MD,ME,MI,MS,NC,NH,NJ,NY,OH,PA,RI,SC,TN,TX,VA,VT,WV}\n",
+      "C|  584,872|18.7%| 7|{CO,IA,MO,NM,OK,WI,WY}\n",
      "∆|      144| 0.0%|\n"
     ]
    }
@ -439,15 +416,15 @@
      "49 states ⇒ 45,856 cuts (maxsize ≤ 8, area ≤ 638,220) ⇒ 19,860 splits.\n",
      "\n",
      "1. Split that maximizes area(B):\n",
-      "A|1,348,714|43.2%|31|AL CT DC DE FL GA IA IL IN KY MA MD ME MI MN MT NC ND NH NJ NY OH PA RI SC SD TN VA VT WI WV\n",
-      "B|1,341,666|43.0%|12|AZ CA CO KS LA NM NV OK OR TX UT WA\n",
-      "C|  430,048|13.8%| 6|AR ID MO MS NE WY\n",
+      "A|1,348,714|43.2%|31|{AL,CT,DC,DE,FL,GA,IA,IL,IN,KY,MA,MD,ME,MI,MN,MT,NC,ND,NH,NJ,NY,OH,PA,RI,SC,SD,TN,VA,VT,WI,WV}\n",
+      "B|1,341,666|43.0%|12|{AZ,CA,CO,KS,LA,NM,NV,OK,OR,TX,UT,WA}\n",
+      "C|  430,048|13.8%| 6|{AR,ID,MO,MS,NE,WY}\n",
      "∆|    7,048| 0.2%|\n",
      "\n",
      "2. Split that minimizes ∆ = area(A) - area(B):\n",
-      "A|1,267,816|40.6%|13|AZ CA ID KS MN MT ND NE NV OR SD UT WA\n",
-      "B|1,267,740|40.6%|29|AL AR CT DC DE FL GA IL IN KY LA MA MD ME MI MS NC NH NJ NY OH PA RI SC TN TX VA VT WV\n",
-      "C|  584,872|18.7%| 7|CO IA MO NM OK WI WY\n",
+      "A|1,267,816|40.6%|13|{AZ,CA,ID,KS,MN,MT,ND,NE,NV,OR,SD,UT,WA}\n",
+      "B|1,267,740|40.6%|29|{AL,AR,CT,DC,DE,FL,GA,IL,IN,KY,LA,MA,MD,ME,MI,MS,NC,NH,NJ,NY,OH,PA,RI,SC,TN,TX,VA,VT,WV}\n",
+      "C|  584,872|18.7%| 7|{CO,IA,MO,NM,OK,WI,WY}\n",
      "∆|       76| 0.0%|\n"
     ]
    }
@ -469,7 +446,7 @@
   "source": [
    "# Four-state cuts\n",
    "\n",
-    "If we are restricted to four-state cuts, the proposed {IL, MO, OK, NM} cut is indeed best:"
+    "If we are restricted to four-state cuts, the proposed {IL, MO, NM, OK} cut is indeed best:"
   ]
  },
  {
@ -484,15 +461,15 @@
      "49 states ⇒ 61 cuts (maxsize ≤ 4, area ≤ 638,220) ⇒ 45 splits.\n",
      "\n",
      "1. Split that maximizes area(B):\n",
-      "A|1,607,869|51.5%|18|AZ CA CO IA ID KS MN MT ND NE NV OR SD UP UT WA WI WY\n",
-      "B|1,193,381|38.2%|27|AL AR CT DE FL GA IN KY LA LP MA MD ME MS NC NH NJ NY OH PA RI SC TN TX VA VT WV\n",
-      "C|  319,110|10.2%| 4|IL MO NM OK\n",
+      "A|1,607,869|51.5%|18|{AZ,CA,CO,IA,ID,KS,MN,MT,ND,NE,NV,OR,SD,UP,UT,WA,WI,WY}\n",
+      "B|1,193,381|38.2%|27|{AL,AR,CT,DE,FL,GA,IN,KY,LA,LP,MA,MD,ME,MS,NC,NH,NJ,NY,OH,PA,RI,SC,TN,TX,VA,VT,WV}\n",
+      "C|  319,110|10.2%| 4|{IL,MO,NM,OK}\n",
      "∆|  414,488|13.3%|\n",
      "\n",
      "2. Split that minimizes ∆ = area(A) - area(B):\n",
-      "A|1,607,869|51.5%|18|AZ CA CO IA ID KS MN MT ND NE NV OR SD UP UT WA WI WY\n",
-      "B|1,193,381|38.2%|27|AL AR CT DE FL GA IN KY LA LP MA MD ME MS NC NH NJ NY OH PA RI SC TN TX VA VT WV\n",
-      "C|  319,110|10.2%| 4|IL MO NM OK\n",
+      "A|1,607,869|51.5%|18|{AZ,CA,CO,IA,ID,KS,MN,MT,ND,NE,NV,OR,SD,UP,UT,WA,WI,WY}\n",
+      "B|1,193,381|38.2%|27|{AL,AR,CT,DE,FL,GA,IN,KY,LA,LP,MA,MD,ME,MS,NC,NH,NJ,NY,OH,PA,RI,SC,TN,TX,VA,VT,WV}\n",
+      "C|  319,110|10.2%| 4|{IL,MO,NM,OK}\n",
      "∆|  414,488|13.3%|\n"
     ]
    }
@ -507,7 +484,9 @@
   "source": [
    "# Achieving equality on question 2\n",
    "\n",
-    "Can we find regions with *exactly* equal areas? The function `make_equals` generates contiguous regions (up to maxsize 10), keeping track of the areas, and when it finds a second disjoint region with the same area, it yields the two regions with their area:"
+    "Can we find regions with *exactly* equal areas (to the nearest square mile)? The function `make_equals` generates contiguous regions (up to maxsize 10 by default), keeping track of the areas, and when it finds a second disjoint region with the same area, it yields the two regions with their area. \n",
+    "\n",
+    "With `make_cuts` and `make_splits` we generated a contiguous cut first, then checked that the cut formed two valid regions. Now with `make_equals` we generate two equal-area regions first, then check that they are separated by a not-necessarily-contiguous cut."
   ]
  },
  {
@ -518,65 +497,48 @@
   "source": [
    "def make_equals(country, maxsize=10) -> Iterator[Tuple[int, Region, Region]]:\n",
    "    \"\"\"Yield (area, A, B) for disjoint regions A, B up to `maxsize` with exactly equal area.\"\"\"\n",
-    "    table = {} # {area: region_with_that_area}\n",
-    "    for region in contiguous_regions(country, maxsize, area(country) / 2, country):\n",
-    "        a = area(region)\n",
-    "        if a in table and separated(region, table[a]):\n",
-    "            yield (a, region, table[a])\n",
-    "        table[a] = region\n",
+    "    table = defaultdict(set) # {area: [regions_with_that_area...]}\n",
+    "    for A in make_cuts(country, maxsize, area(country) / 2, country, country):\n",
+    "        a = area(A)\n",
+    "        for B in table[a]:\n",
+    "            if separated(A, B):\n",
+    "                yield (a, A, B)\n",
+    "        table[a].add(A)\n",
    "    \n",
    "def separated(A, B) -> bool: \n",
    "    \"\"\"Are regions A and B disjoint with no shared border?\"\"\"\n",
    "    return A.isdisjoint(B) and all(neighbors[a].isdisjoint(B) for a in A)"
   ]
  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "543"
-      ]
-     },
-     "execution_count": 13,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "equals = list(make_equals(usa49, 10))\n",
-    "len(equals)"
-   ]
-  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "From `equals` we can find the two equi-area regions with largest area:"
+    "This is the first computation that will take more than a couple of seconds:"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
+      "CPU times: user 47.6 s, sys: 525 ms, total: 48.2 s\n",
+      "Wall time: 48.2 s\n",
      "\n",
      "Split with ∆ = 0:\n",
-      "A|  874,595|28.0%|10|AL FL GA KS LA MS NC NM OK TX\n",
-      "B|  874,595|28.0%|10|CA IA ID IL IN MT ND NV SD WA\n",
-      "C|1,371,170|43.9%|29|AR AZ CO CT DE KY LP MA MD ME MN MO NE NH NJ NY OH OR PA RI SC TN UP UT VA VT WI WV WY\n",
+      "A|  874,595|28.0%|10|{AL,FL,GA,KS,LA,MS,NC,NM,OK,TX}\n",
+      "B|  874,595|28.0%|10|{CA,IA,ID,IL,IN,MT,ND,NV,SD,WA}\n",
+      "C|1,371,170|43.9%|29|{AR,AZ,CO,CT,DE,KY,LP,MA,MD,ME,MN,MO,NE,NH,NJ,NY,OH,OR,PA,RI,SC,TN,UP,UT,VA,VT,WI,WV,WY}\n",
      "∆|        0| 0.0%|\n"
     ]
    }
   ],
   "source": [
+    "%time equals = list(make_equals(usa49, 10))\n",
    "(a, A, B) = max(equals)\n",
    "show('Split with ∆ = 0', usa49, (A, B, usa49 - A - B))"
   ]
@ -585,23 +547,24 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "There are probably larger regions with equal area, but it would take longer to search for them."
+    "There may be larger regions with equal area. I searched up to `maxsize=12` and didn't find anything.\n",
+    "<img src=\"map6.png\" width=690>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "# Achieving optimality\n",
+    "A difference in area of ∆ = 0 is obviously an optimal answer to question 2. How about question 1? \n",
    "\n",
-    "A difference in area of ∆ = 0 is obviously an optimal answer to question 2.\n",
+    "# Proving optimality on question 1\n",
    "\n",
-    "How about question 1? We arbitrarily limited cuts to 8 states, going from the north to south border. To prove that we have the best cut, we'll have to eliminate those constraints, allowing cuts of any number of states going between any border states. This will increase run time, probably by an order of magnitude. Fortunately, we can tighten the area constraint tobring that down a bit. We found that the cut {CO, IL, MO, NE, NM} produces a region *B* with area 1,344,149, so that means that any cut that is better for question 1 must create a split where the areas of both *A* and *B* are greater than 1,344,149, Therefore, we can lower `maxarea` from 638,220 to:"
+    "We arbitrarily limited cuts to 8 states, going from the north to south border. To prove that we have the best cut, we'll have to eliminate those constraints, allowing cuts of any number of states going between any border states. This will increase run time, probably by an order of magnitude. Fortunately, we can tighten the area constraint a bit. We found that the cut {CO, IL, MO, NE, NM} produces a region *B* with area 1,344,149, so that means that any cut that is better for question 1 must create a split where the areas of both *A* and *B* are greater than 1,344,149, Therefore, we can lower the `maxarea` for the cut  to:"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
@ -610,7 +573,7 @@
       "432062"
      ]
     },
-     "execution_count": 15,
+     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -621,7 +584,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
@ -631,30 +594,28 @@
      "49 states ⇒ 547,779 cuts (maxsize ≤ 49, area ≤ 432,062) ⇒ 42,685 splits.\n",
      "\n",
      "1. Split that maximizes area(B):\n",
-      "A|1,345,558|43.1%|29|AL AR CT DE FL GA IN KS KY LA LP MA MD ME MS NC NH NJ NY OH OK PA RI SC TN TX VA VT WV\n",
-      "B|1,344,149|43.1%|15|AZ CA IA ID MN MT ND NV OR SD UP UT WA WI WY\n",
-      "C|  430,653|13.8%| 5|CO IL MO NE NM\n",
+      "A|1,345,558|43.1%|29|{AL,AR,CT,DE,FL,GA,IN,KS,KY,LA,LP,MA,MD,ME,MS,NC,NH,NJ,NY,OH,OK,PA,RI,SC,TN,TX,VA,VT,WV}\n",
+      "B|1,344,149|43.1%|15|{AZ,CA,IA,ID,MN,MT,ND,NV,OR,SD,UP,UT,WA,WI,WY}\n",
+      "C|  430,653|13.8%| 5|{CO,IL,MO,NE,NM}\n",
      "∆|    1,409| 0.0%|\n",
      "\n",
      "2. Split that minimizes ∆ = area(A) - area(B):\n",
-      "A|1,345,558|43.1%|29|AL AR CT DE FL GA IN KS KY LA LP MA MD ME MS NC NH NJ NY OH OK PA RI SC TN TX VA VT WV\n",
-      "B|1,344,149|43.1%|15|AZ CA IA ID MN MT ND NV OR SD UP UT WA WI WY\n",
-      "C|  430,653|13.8%| 5|CO IL MO NE NM\n",
+      "A|1,345,558|43.1%|29|{AL,AR,CT,DE,FL,GA,IN,KS,KY,LA,LP,MA,MD,ME,MS,NC,NH,NJ,NY,OH,OK,PA,RI,SC,TN,TX,VA,VT,WV}\n",
+      "B|1,344,149|43.1%|15|{AZ,CA,IA,ID,MN,MT,ND,NV,OR,SD,UP,UT,WA,WI,WY}\n",
+      "C|  430,653|13.8%| 5|{CO,IL,MO,NE,NM}\n",
      "∆|    1,409| 0.0%|\n"
     ]
    }
   ],
   "source": [
-    "answers(usa49, maxsize=49, maxarea=432062, start=border, end=border)"
+    "answers(usa49, maxsize=49, maxarea=area(usa49) - 2 * 1344149, start=border, end=border)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "This confirms that {CO, IL, MO, NE, NM} is indeed the optimal cut for question 1.\n",
-    "\n",
-    "(*Note:* the `answers` computation above took about 40 seconds, and the  `make_equals` computation before that took about 50 seconds. All the other cells in this notebook take under 5 seconds. You can use the IPython `%%time` magic directive if you want to see for yourself, or if you want to try to modify the functions to achieve better efficiency.)"
+    "This confirms that {CO, IL, MO, NE, NM} is indeed the optimal cut for question 1."
   ]
  },
  {
@ -668,7 +629,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
@ -677,7 +638,7 @@
       "'ok'"
      ]
     },
-     "execution_count": 17,
+     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -710,7 +671,7 @@
    "        (states('AZ CO MT NM UT WY'), states('OR WA'), states('CA ID NV')),\n",
    "        (states('AZ CO MT NM NV UT WY'), states('WA'), states('CA ID OR'))}\n",
    "\n",
-    "    assert set(contiguous_regions(four, 4, maxarea, four)) == {\n",
+    "    assert set(make_cuts(four, 4, maxarea, four, four)) == {\n",
    "        states('UT'), states('CO'), states('AZ'), states('NM'),\n",
    "        states('AZ CO'), states('AZ NM'), states('CO NM'), states('NM UT'), states('AZ UT'), states('CO UT'),\n",
    "        states('AZ CO UT'), states('AZ CO NM'), states('CO NM UT'), states('AZ NM UT'),\n",