Add files via upload

2025-12-20 23:30:58 -08:00
parent 69a2b1f887
commit 2df96d1cd4
2 changed files with 134 additions and 169 deletions
--- a/ipynb/Advent-2025-AI.ipynb
+++ b/ipynb/Advent-2025-AI.ipynb
@@ -175,7 +175,7 @@
    {
     "data": {
      "text/plain": [
-       "Puzzle  1.1:   .0015 seconds, answer 1182            correct"
+       "Puzzle  1.1:   .0007 seconds, answer 1182            correct"
      ]
     },
     "execution_count": 3,
@@ -305,7 +305,7 @@
    {
     "data": {
      "text/plain": [
-       "Puzzle  1.2:   .0021 seconds, answer 7509            WRONG; EXPECTED ANSWER IS 6907"
+       "Puzzle  1.2:   .0008 seconds, answer 7509            WRONG; EXPECTED ANSWER IS 6907"
      ]
     },
     "execution_count": 5,
@@ -409,7 +409,7 @@
    {
     "data": {
      "text/plain": [
-       "Puzzle  1.2:   .0018 seconds, answer 6907            correct"
+       "Puzzle  1.2:   .0008 seconds, answer 6907            correct"
      ]
     },
     "execution_count": 7,
@@ -489,7 +489,7 @@
    {
     "data": {
      "text/plain": [
-       "Puzzle  2.1:   .1342 seconds, answer 23560874270     correct"
+       "Puzzle  2.1:   .0355 seconds, answer 23560874270     correct"
      ]
     },
     "execution_count": 9,
@@ -573,7 +573,7 @@
    {
     "data": {
      "text/plain": [
-       "Puzzle  2.2:   .1546 seconds, answer 44143124633     correct"
+       "Puzzle  2.2:   .0403 seconds, answer 44143124633     correct"
      ]
     },
     "execution_count": 11,
@@ -646,7 +646,7 @@
    {
     "data": {
      "text/plain": [
-       "Puzzle  3.1:   .0043 seconds, answer 17085           correct"
+       "Puzzle  3.1:   .0019 seconds, answer 17085           correct"
      ]
     },
     "execution_count": 13,
@@ -731,7 +731,7 @@
    {
     "data": {
      "text/plain": [
-       "Puzzle  3.2:   .0080 seconds, answer 169408143086082 correct"
+       "Puzzle  3.2:   .0026 seconds, answer 169408143086082 correct"
      ]
     },
     "execution_count": 15,
@@ -828,7 +828,7 @@
    {
     "data": {
      "text/plain": [
-       "Puzzle  4.1:   .0174 seconds, answer 1569            correct"
+       "Puzzle  4.1:   .0084 seconds, answer 1569            correct"
      ]
     },
     "execution_count": 17,
@@ -923,7 +923,7 @@
    {
     "data": {
      "text/plain": [
-       "Puzzle  4.2:   .4197 seconds, answer 9280            correct"
+       "Puzzle  4.2:   .1996 seconds, answer 9280            correct"
      ]
     },
     "execution_count": 19,
@@ -1032,7 +1032,7 @@
    {
     "data": {
      "text/plain": [
-       "Puzzle  4.2:   .1065 seconds, answer 9280            correct"
+       "Puzzle  4.2:   .0329 seconds, answer 9280            correct"
      ]
     },
     "execution_count": 21,
@@ -1128,7 +1128,7 @@
    {
     "data": {
      "text/plain": [
-       "Puzzle  5.1:   .0064 seconds, answer 635             correct"
+       "Puzzle  5.1:   .0029 seconds, answer 635             correct"
      ]
     },
     "execution_count": 23,
@@ -1227,7 +1227,7 @@
    {
     "data": {
      "text/plain": [
-       "Puzzle  5.2:   .0002 seconds, answer 369761800782619 correct"
+       "Puzzle  5.2:   .0001 seconds, answer 369761800782619 correct"
      ]
     },
     "execution_count": 25,
@@ -1345,7 +1345,7 @@
    {
     "data": {
      "text/plain": [
-       "Puzzle  6.1:   .0079 seconds, answer 5877594983578   correct"
+       "Puzzle  6.1:   .0034 seconds, answer 5877594983578   correct"
      ]
     },
     "execution_count": 27,
@@ -1483,7 +1483,7 @@
    {
     "data": {
      "text/plain": [
-       "Puzzle  6.2:   .0065 seconds, answer 11159825706149  correct"
+       "Puzzle  6.2:   .0023 seconds, answer 11159825706149  correct"
      ]
     },
     "execution_count": 29,
@@ -1597,7 +1597,7 @@
    {
     "data": {
      "text/plain": [
-       "Puzzle  7.1:   .0011 seconds, answer 1681            correct"
+       "Puzzle  7.1:   .0004 seconds, answer 1681            correct"
      ]
     },
     "execution_count": 31,
@@ -1712,7 +1712,7 @@
    {
     "data": {
      "text/plain": [
-       "Puzzle  7.2:   .0019 seconds, answer 422102272495018 correct"
+       "Puzzle  7.2:   .0008 seconds, answer 422102272495018 correct"
      ]
     },
     "execution_count": 33,
@@ -1871,7 +1871,7 @@
    {
     "data": {
      "text/plain": [
-       "Puzzle  7.2:   .0025 seconds, answer 422102272495018 correct"
+       "Puzzle  7.2:   .0011 seconds, answer 422102272495018 correct"
      ]
     },
     "execution_count": 36,
@@ -2012,7 +2012,7 @@
    {
     "data": {
      "text/plain": [
-       "Puzzle  8.1:  1.3418 seconds, answer 24360           correct"
+       "Puzzle  8.1:   .2886 seconds, answer 24360           correct"
      ]
     },
     "execution_count": 38,
@@ -2151,7 +2151,7 @@
    {
     "data": {
      "text/plain": [
-       "Puzzle  8.2:  1.0359 seconds, answer 2185817796      correct"
+       "Puzzle  8.2:   .2857 seconds, answer 2185817796      correct"
      ]
     },
     "execution_count": 40,
@@ -2234,7 +2234,7 @@
    {
     "data": {
      "text/plain": [
-       "Puzzle  9.1:   .0333 seconds, answer 4772103936      correct"
+       "Puzzle  9.1:   .0094 seconds, answer 4772103936      correct"
      ]
     },
     "execution_count": 42,
@@ -2442,7 +2442,7 @@
    {
     "data": {
      "text/plain": [
-       "Puzzle  9.2:   .8590 seconds, answer 1529675217      correct"
+       "Puzzle  9.2:   .4376 seconds, answer 1529675217      correct"
      ]
     },
     "execution_count": 44,
@@ -2601,7 +2601,7 @@
    {
     "data": {
      "text/plain": [
-       "Puzzle 10.1:   .0044 seconds, answer 441             correct"
+       "Puzzle 10.1:   .0019 seconds, answer 441             correct"
      ]
     },
     "execution_count": 46,
@@ -2828,7 +2828,7 @@
    {
     "data": {
      "text/plain": [
-       "Puzzle 10.2: 11.4070 seconds, answer 18559           correct"
+       "Puzzle 10.2:  3.4554 seconds, answer 18559           correct"
      ]
     },
     "execution_count": 48,
@@ -2948,7 +2948,7 @@
    {
     "data": {
      "text/plain": [
-       "Puzzle 10.2:   .1177 seconds, answer 18559           correct"
+       "Puzzle 10.2:   .0461 seconds, answer 18559           correct"
      ]
     },
     "execution_count": 50,
@@ -3211,7 +3211,7 @@
    {
     "data": {
      "text/plain": [
-       "Puzzle 11.1:   .0007 seconds, answer 574             correct"
+       "Puzzle 11.1:   .0003 seconds, answer 574             correct"
      ]
     },
     "execution_count": 53,
@@ -3327,7 +3327,7 @@
    {
     "data": {
      "text/plain": [
-       "Puzzle 11.2:   .0016 seconds, answer 306594217920240 correct"
+       "Puzzle 11.2:   .0009 seconds, answer 306594217920240 correct"
      ]
     },
     "execution_count": 55,
@@ -3601,6 +3601,14 @@
    "    return ok"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "2cbca4d7-773c-4027-8f27-270887180ee1",
+   "metadata": {},
+   "source": [
+    "*Below we see that ChatGPT's code works, but it takes 2 minutes to run:*"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 57,
@@ -3610,7 +3618,7 @@
    {
     "data": {
      "text/plain": [
-       "Puzzle 12.1: 211.1226 seconds, answer 454             correct"
+       "Puzzle 12.1: 112.2115 seconds, answer 454             correct"
      ]
     },
     "execution_count": 57,
@@ -3687,7 +3695,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 58,
   "id": "3b3dbe00-a191-412b-b062-edefc9ab4013",
   "metadata": {},
   "outputs": [],
@@ -3886,87 +3894,44 @@
   "id": "8aa26008-a652-4860-9c84-5ba4344d32f3",
   "metadata": {},
   "source": [
-    "# Summary of Run Times\n",
+    "# Summary\n",
    "\n",
-    "*Here are the run times and some comments.*\n",
+    "*Here are the run times, lines-of-code counts, and some comments.*\n",
    "\n",
    "*The LLM lines-of-code count is 5 times the human count. The LLM run times are roughly double the human-written run times, if we throw out 12.1, where the human noticed the trick and the LLM didn't. But all the solutions run in under a second, so run time is not a big deal.*"
   ]
  },
  {
   "cell_type": "markdown",
-   "id": "49736354-51c0-4870-bc39-f7a2e955e196",
+   "id": "a27a329a-01f5-4fbc-a3a6-5946bffe859f",
   "metadata": {},
   "source": [
-    "     DAY            LLM     Human  LLM  Human\n",
-    "       #\tLLM     Time\tTime   LOC  LOC\t\n",
-    "     ---    ------  ----    -----  ---  -----\n",
-    "     1.1\tGemini\t.001\t.001\t51\t6\tStraightforward and easy for LLM and human.\n",
-    "     1.2\tGemini\t.002\t.001\t75\t11\tBoth LLM and human erred on the distance from 0 to 0.\n",
-    "     2.1\tClaude\t.121\t.003\t29\t17\tEasy\n",
-    "     2.2\tClaude\t.135\t.004\t35\t16\tBoth LLM and human found the more efficient half-digits approach\n",
-    "     3.1\tChatGPT\t.004\t.001\t22\t11\tEasy\n",
-    "     3.2\tChatGPT\t.007\t.002\t42\t14\tEasy\n",
-    "     4.1\tGemini\t.016\t.055\t44\t9\tEasy\n",
-    "     4.2\tGemini\t.379\t.139\t52\t8\tLLM chose the less efficient scan-whole-grid approach\n",
-    "     5.1\tClaude\t.005\t.012\t45\t11\tEasy\n",
-    "     5.2\tClaude\t.001\t.001\t58\t9\tEasy\n",
-    "     6.1\tChatGPT\t.008\t.002\t67\t7\tEasy; bad “if x: True else: False” idiom by LLM\n",
-    "     6.2\tChatGPT\t.006\t.006\t87\t27\tEasy; LLM overly verbose\n",
-    "     7.1\tGemini\t.001\t.001\t63\t13\tEasy\n",
-    "     7.2\tGemini\t.002\t.002\t70\t11\tEasy\n",
-    "     8.1\tClaude\t.828\t.583\t91\t27\tEasy\n",
-    "     8.2\tClaude\t.835\t.618\t82\t11\tEasy; but LLMs Union-Find data type runs slower than mine.\n",
-    "     9.1\tChatGPT\t.027\t.037\t33\t7\tEasy\n",
-    "     9.2\tChatGPT\t.771\t.016\t157\t36\tLLM code a bit complicated; human uses “2 point” trick for speedup\n",
-    "    10.1\tGemini\t.005\t.001\t101\t18\tEasy\n",
-    "    10.2\tGemini\t.120\t.112\t70\t13\tmilp solutions similar; LLM offers other solutions\n",
-    "    11.1\tClaude\t.023\t.001\t83\t11\tEasy; LLM has a bit of vestigial code\n",
-    "    11.2\tClaude\t.001\t.001\t77\t11\tEasy\n",
-    "    12.1\tChatGPT\t3min\t.002\t238\t20\tHuman saw shortcut to avoid search; LLM wrote search functions\n",
-    "    TOTAL           3.29\t1.60   1672\t324\tOverall, Human code is 5x briefer, 2x faster (even ignoring 12.1)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 58,
-   "id": "1329c562-4721-4084-a7f2-8e8af82986c0",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Puzzle  1.1:   .0015 seconds, answer 1182            correct\n",
-      "Puzzle  1.2:   .0018 seconds, answer 6907            correct\n",
-      "Puzzle  2.1:   .1342 seconds, answer 23560874270     correct\n",
-      "Puzzle  2.2:   .1546 seconds, answer 44143124633     correct\n",
-      "Puzzle  3.1:   .0043 seconds, answer 17085           correct\n",
-      "Puzzle  3.2:   .0080 seconds, answer 169408143086082 correct\n",
-      "Puzzle  4.1:   .0174 seconds, answer 1569            correct\n",
-      "Puzzle  4.2:   .1065 seconds, answer 9280            correct\n",
-      "Puzzle  5.1:   .0064 seconds, answer 635             correct\n",
-      "Puzzle  5.2:   .0002 seconds, answer 369761800782619 correct\n",
-      "Puzzle  6.1:   .0079 seconds, answer 5877594983578   correct\n",
-      "Puzzle  6.2:   .0065 seconds, answer 11159825706149  correct\n",
-      "Puzzle  7.1:   .0011 seconds, answer 1681            correct\n",
-      "Puzzle  7.2:   .0025 seconds, answer 422102272495018 correct\n",
-      "Puzzle  8.1:  1.3418 seconds, answer 24360           correct\n",
-      "Puzzle  8.2:  1.0359 seconds, answer 2185817796      correct\n",
-      "Puzzle  9.1:   .0333 seconds, answer 4772103936      correct\n",
-      "Puzzle  9.2:   .8590 seconds, answer 1529675217      correct\n",
-      "Puzzle 10.1:   .0044 seconds, answer 441             correct\n",
-      "Puzzle 10.2:   .1177 seconds, answer 18559           correct\n",
-      "Puzzle 11.1:   .0007 seconds, answer 574             correct\n",
-      "Puzzle 11.2:   .0016 seconds, answer 306594217920240 correct\n",
-      "Puzzle 12.1: 211.1226 seconds, answer 454             correct\n",
-      "\n",
-      "Time in seconds: sum = 214.970, mean = 9.347, median =  .008, max = 211.123\n"
-     ]
-    }
-   ],
-   "source": [
-    "summary(answers) # Below is the most recent run of this notebook; the chart above is from a previous run."
+    " |  DAY | LLM<br>Name | LLM<br>Time | Human<br>Time | LLM<br>LOC | Human<br>LOC | Comments |\n",
+    " |  --- | ------ | ---- | ----- | --- | -----  | ---|\n",
+    " |  1.1 | Gemini | .0007 | .0002 | 51 | 6 | Straightforward and easy for LLM and human.  | \n",
+    " |  1.2 | Gemini | .0008 | .0004 | 75 | 11 | Both LLM and human erred on the distance from 0 to 0.  | \n",
+    " |  2.1 | Claude | .0355 | .0001 | 29 | 17 | Easy  | \n",
+    " |  2.2 | Claude | .0403 | .0002 | 35 | 16 | Both LLM and human found the more efficient half-digits approach  | \n",
+    " |  3.1 | ChatGPT | .0019 | .0003 | 22 | 11 | Easy  | \n",
+    " |  3.2 | ChatGPT | .0026 | .0008 | 42 | 14 | Easy  | \n",
+    " |  4.1 | Gemini | .0084 | .0194 | 44 | 9 | Easy  | \n",
+    " |  4.2 | Gemini | .0329 | .0495 | 52 | 8 | LLM chose the less efficient scan-whole-grid approach  | \n",
+    " |  5.1 | Claude | .0029 | .0045 | 45 | 11 | Easy  | \n",
+    " |  5.2 | Claude | .0001 | .0000 | 58 | 9 | Easy  | \n",
+    " |  6.1 | ChatGPT | .0034 | .0008 | 67 | 7 | Easy; bad “if x: True else: False” idiom by LLM  | \n",
+    " |  6.2 | ChatGPT | .0023 | .0013 | 87 | 27 | Easy; LLM overly verbose  | \n",
+    " |  7.1 | Gemini | .0004 | .0003 | 63 | 13 | Easy  | \n",
+    " |  7.2 | Gemini | .0011 | .0007 | 70 | 11 | Easy  | \n",
+    " |  8.1 | Claude | .2886 | .1981 | 91 | 27 | Easy  | \n",
+    " |  8.2 | Claude | .2857 | .2034 | 82 | 11 | Easy; but LLMs Union-Find data type runs slower than mine.  | \n",
+    " |  9.1 | ChatGPT | .0094 | .0187 | 33 | 7 | Easy  | \n",
+    " |  9.2 | ChatGPT | .4376 | .0046 | 157 | 36 | LLM code a bit complicated; human uses “2 point” trick for speedup  | \n",
+    " |  10.1 | Gemini | .0019 | .0242 | 101 | 18 | Easy  | \n",
+    " |  10.2 | Gemini | .0461 | .0680 | 70 | 13 | milp solutions similar; LLM offers other solutions  | \n",
+    " |  11.1 | Claude | .0003 | .0001 | 83 | 11 | Easy; LLM has a bit of vestigial code  | \n",
+    " |  11.2 | Claude | .0009 | .0010 | 77 | 11 | Easy  | \n",
+    " |  12.1 | ChatGPT | 112.2 | .0006 | 238 | 20 | Human saw shortcut to avoid search; LLM wrote search functions  | \n",
+    " |  **TOTAL** | |**1.204** | **.597** | **1672** | **324** | **Total time ignores 12.1. Overall, Human code is 5x briefer, 2x faster**  | "
   ]
  }
 ],
@@ -3986,7 +3951,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.13.5"
+   "version": "3.13.3"
  }
 },
 "nbformat": 4,
--- a/ipynb/Advent-2025.ipynb
+++ b/ipynb/Advent-2025.ipynb