Add files via upload

This commit is contained in:
Peter Norvig 2025-12-20 23:30:58 -08:00 committed by GitHub
parent 69a2b1f887
commit 2df96d1cd4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 134 additions and 169 deletions

View File

@ -175,7 +175,7 @@
{
"data": {
"text/plain": [
"Puzzle 1.1: .0015 seconds, answer 1182 correct"
"Puzzle 1.1: .0007 seconds, answer 1182 correct"
]
},
"execution_count": 3,
@ -305,7 +305,7 @@
{
"data": {
"text/plain": [
"Puzzle 1.2: .0021 seconds, answer 7509 WRONG; EXPECTED ANSWER IS 6907"
"Puzzle 1.2: .0008 seconds, answer 7509 WRONG; EXPECTED ANSWER IS 6907"
]
},
"execution_count": 5,
@ -409,7 +409,7 @@
{
"data": {
"text/plain": [
"Puzzle 1.2: .0018 seconds, answer 6907 correct"
"Puzzle 1.2: .0008 seconds, answer 6907 correct"
]
},
"execution_count": 7,
@ -489,7 +489,7 @@
{
"data": {
"text/plain": [
"Puzzle 2.1: .1342 seconds, answer 23560874270 correct"
"Puzzle 2.1: .0355 seconds, answer 23560874270 correct"
]
},
"execution_count": 9,
@ -573,7 +573,7 @@
{
"data": {
"text/plain": [
"Puzzle 2.2: .1546 seconds, answer 44143124633 correct"
"Puzzle 2.2: .0403 seconds, answer 44143124633 correct"
]
},
"execution_count": 11,
@ -646,7 +646,7 @@
{
"data": {
"text/plain": [
"Puzzle 3.1: .0043 seconds, answer 17085 correct"
"Puzzle 3.1: .0019 seconds, answer 17085 correct"
]
},
"execution_count": 13,
@ -731,7 +731,7 @@
{
"data": {
"text/plain": [
"Puzzle 3.2: .0080 seconds, answer 169408143086082 correct"
"Puzzle 3.2: .0026 seconds, answer 169408143086082 correct"
]
},
"execution_count": 15,
@ -828,7 +828,7 @@
{
"data": {
"text/plain": [
"Puzzle 4.1: .0174 seconds, answer 1569 correct"
"Puzzle 4.1: .0084 seconds, answer 1569 correct"
]
},
"execution_count": 17,
@ -923,7 +923,7 @@
{
"data": {
"text/plain": [
"Puzzle 4.2: .4197 seconds, answer 9280 correct"
"Puzzle 4.2: .1996 seconds, answer 9280 correct"
]
},
"execution_count": 19,
@ -1032,7 +1032,7 @@
{
"data": {
"text/plain": [
"Puzzle 4.2: .1065 seconds, answer 9280 correct"
"Puzzle 4.2: .0329 seconds, answer 9280 correct"
]
},
"execution_count": 21,
@ -1128,7 +1128,7 @@
{
"data": {
"text/plain": [
"Puzzle 5.1: .0064 seconds, answer 635 correct"
"Puzzle 5.1: .0029 seconds, answer 635 correct"
]
},
"execution_count": 23,
@ -1227,7 +1227,7 @@
{
"data": {
"text/plain": [
"Puzzle 5.2: .0002 seconds, answer 369761800782619 correct"
"Puzzle 5.2: .0001 seconds, answer 369761800782619 correct"
]
},
"execution_count": 25,
@ -1345,7 +1345,7 @@
{
"data": {
"text/plain": [
"Puzzle 6.1: .0079 seconds, answer 5877594983578 correct"
"Puzzle 6.1: .0034 seconds, answer 5877594983578 correct"
]
},
"execution_count": 27,
@ -1483,7 +1483,7 @@
{
"data": {
"text/plain": [
"Puzzle 6.2: .0065 seconds, answer 11159825706149 correct"
"Puzzle 6.2: .0023 seconds, answer 11159825706149 correct"
]
},
"execution_count": 29,
@ -1597,7 +1597,7 @@
{
"data": {
"text/plain": [
"Puzzle 7.1: .0011 seconds, answer 1681 correct"
"Puzzle 7.1: .0004 seconds, answer 1681 correct"
]
},
"execution_count": 31,
@ -1712,7 +1712,7 @@
{
"data": {
"text/plain": [
"Puzzle 7.2: .0019 seconds, answer 422102272495018 correct"
"Puzzle 7.2: .0008 seconds, answer 422102272495018 correct"
]
},
"execution_count": 33,
@ -1871,7 +1871,7 @@
{
"data": {
"text/plain": [
"Puzzle 7.2: .0025 seconds, answer 422102272495018 correct"
"Puzzle 7.2: .0011 seconds, answer 422102272495018 correct"
]
},
"execution_count": 36,
@ -2012,7 +2012,7 @@
{
"data": {
"text/plain": [
"Puzzle 8.1: 1.3418 seconds, answer 24360 correct"
"Puzzle 8.1: .2886 seconds, answer 24360 correct"
]
},
"execution_count": 38,
@ -2151,7 +2151,7 @@
{
"data": {
"text/plain": [
"Puzzle 8.2: 1.0359 seconds, answer 2185817796 correct"
"Puzzle 8.2: .2857 seconds, answer 2185817796 correct"
]
},
"execution_count": 40,
@ -2234,7 +2234,7 @@
{
"data": {
"text/plain": [
"Puzzle 9.1: .0333 seconds, answer 4772103936 correct"
"Puzzle 9.1: .0094 seconds, answer 4772103936 correct"
]
},
"execution_count": 42,
@ -2442,7 +2442,7 @@
{
"data": {
"text/plain": [
"Puzzle 9.2: .8590 seconds, answer 1529675217 correct"
"Puzzle 9.2: .4376 seconds, answer 1529675217 correct"
]
},
"execution_count": 44,
@ -2601,7 +2601,7 @@
{
"data": {
"text/plain": [
"Puzzle 10.1: .0044 seconds, answer 441 correct"
"Puzzle 10.1: .0019 seconds, answer 441 correct"
]
},
"execution_count": 46,
@ -2828,7 +2828,7 @@
{
"data": {
"text/plain": [
"Puzzle 10.2: 11.4070 seconds, answer 18559 correct"
"Puzzle 10.2: 3.4554 seconds, answer 18559 correct"
]
},
"execution_count": 48,
@ -2948,7 +2948,7 @@
{
"data": {
"text/plain": [
"Puzzle 10.2: .1177 seconds, answer 18559 correct"
"Puzzle 10.2: .0461 seconds, answer 18559 correct"
]
},
"execution_count": 50,
@ -3211,7 +3211,7 @@
{
"data": {
"text/plain": [
"Puzzle 11.1: .0007 seconds, answer 574 correct"
"Puzzle 11.1: .0003 seconds, answer 574 correct"
]
},
"execution_count": 53,
@ -3327,7 +3327,7 @@
{
"data": {
"text/plain": [
"Puzzle 11.2: .0016 seconds, answer 306594217920240 correct"
"Puzzle 11.2: .0009 seconds, answer 306594217920240 correct"
]
},
"execution_count": 55,
@ -3601,6 +3601,14 @@
" return ok"
]
},
{
"cell_type": "markdown",
"id": "2cbca4d7-773c-4027-8f27-270887180ee1",
"metadata": {},
"source": [
"*Below we see that ChatGPT's code works, but it takes 2 minutes to run:*"
]
},
{
"cell_type": "code",
"execution_count": 57,
@ -3610,7 +3618,7 @@
{
"data": {
"text/plain": [
"Puzzle 12.1: 211.1226 seconds, answer 454 correct"
"Puzzle 12.1: 112.2115 seconds, answer 454 correct"
]
},
"execution_count": 57,
@ -3687,7 +3695,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 58,
"id": "3b3dbe00-a191-412b-b062-edefc9ab4013",
"metadata": {},
"outputs": [],
@ -3886,87 +3894,44 @@
"id": "8aa26008-a652-4860-9c84-5ba4344d32f3",
"metadata": {},
"source": [
"# Summary of Run Times\n",
"# Summary\n",
"\n",
"*Here are the run times and some comments.*\n",
"*Here are the run times, lines-of-code counts, and some comments.*\n",
"\n",
"*The LLM lines-of-code count is 5 times the human count. The LLM run times are roughly double the human-written run times, if we throw out 12.1, where the human noticed the trick and the LLM didn't. But all the solutions run in under a second, so run time is not a big deal.*"
]
},
{
"cell_type": "markdown",
"id": "49736354-51c0-4870-bc39-f7a2e955e196",
"id": "a27a329a-01f5-4fbc-a3a6-5946bffe859f",
"metadata": {},
"source": [
" DAY LLM Human LLM Human\n",
" #\tLLM Time\tTime LOC LOC\t\n",
" --- ------ ---- ----- --- -----\n",
" 1.1\tGemini\t.001\t.001\t51\t6\tStraightforward and easy for LLM and human.\n",
" 1.2\tGemini\t.002\t.001\t75\t11\tBoth LLM and human erred on the distance from 0 to 0.\n",
" 2.1\tClaude\t.121\t.003\t29\t17\tEasy\n",
" 2.2\tClaude\t.135\t.004\t35\t16\tBoth LLM and human found the more efficient half-digits approach\n",
" 3.1\tChatGPT\t.004\t.001\t22\t11\tEasy\n",
" 3.2\tChatGPT\t.007\t.002\t42\t14\tEasy\n",
" 4.1\tGemini\t.016\t.055\t44\t9\tEasy\n",
" 4.2\tGemini\t.379\t.139\t52\t8\tLLM chose the less efficient scan-whole-grid approach\n",
" 5.1\tClaude\t.005\t.012\t45\t11\tEasy\n",
" 5.2\tClaude\t.001\t.001\t58\t9\tEasy\n",
" 6.1\tChatGPT\t.008\t.002\t67\t7\tEasy; bad “if x: True else: False” idiom by LLM\n",
" 6.2\tChatGPT\t.006\t.006\t87\t27\tEasy; LLM overly verbose\n",
" 7.1\tGemini\t.001\t.001\t63\t13\tEasy\n",
" 7.2\tGemini\t.002\t.002\t70\t11\tEasy\n",
" 8.1\tClaude\t.828\t.583\t91\t27\tEasy\n",
" 8.2\tClaude\t.835\t.618\t82\t11\tEasy; but LLMs Union-Find data type runs slower than mine.\n",
" 9.1\tChatGPT\t.027\t.037\t33\t7\tEasy\n",
" 9.2\tChatGPT\t.771\t.016\t157\t36\tLLM code a bit complicated; human uses “2 point” trick for speedup\n",
" 10.1\tGemini\t.005\t.001\t101\t18\tEasy\n",
" 10.2\tGemini\t.120\t.112\t70\t13\tmilp solutions similar; LLM offers other solutions\n",
" 11.1\tClaude\t.023\t.001\t83\t11\tEasy; LLM has a bit of vestigial code\n",
" 11.2\tClaude\t.001\t.001\t77\t11\tEasy\n",
" 12.1\tChatGPT\t3min\t.002\t238\t20\tHuman saw shortcut to avoid search; LLM wrote search functions\n",
" TOTAL 3.29\t1.60 1672\t324\tOverall, Human code is 5x briefer, 2x faster (even ignoring 12.1)"
]
},
{
"cell_type": "code",
"execution_count": 58,
"id": "1329c562-4721-4084-a7f2-8e8af82986c0",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Puzzle 1.1: .0015 seconds, answer 1182 correct\n",
"Puzzle 1.2: .0018 seconds, answer 6907 correct\n",
"Puzzle 2.1: .1342 seconds, answer 23560874270 correct\n",
"Puzzle 2.2: .1546 seconds, answer 44143124633 correct\n",
"Puzzle 3.1: .0043 seconds, answer 17085 correct\n",
"Puzzle 3.2: .0080 seconds, answer 169408143086082 correct\n",
"Puzzle 4.1: .0174 seconds, answer 1569 correct\n",
"Puzzle 4.2: .1065 seconds, answer 9280 correct\n",
"Puzzle 5.1: .0064 seconds, answer 635 correct\n",
"Puzzle 5.2: .0002 seconds, answer 369761800782619 correct\n",
"Puzzle 6.1: .0079 seconds, answer 5877594983578 correct\n",
"Puzzle 6.2: .0065 seconds, answer 11159825706149 correct\n",
"Puzzle 7.1: .0011 seconds, answer 1681 correct\n",
"Puzzle 7.2: .0025 seconds, answer 422102272495018 correct\n",
"Puzzle 8.1: 1.3418 seconds, answer 24360 correct\n",
"Puzzle 8.2: 1.0359 seconds, answer 2185817796 correct\n",
"Puzzle 9.1: .0333 seconds, answer 4772103936 correct\n",
"Puzzle 9.2: .8590 seconds, answer 1529675217 correct\n",
"Puzzle 10.1: .0044 seconds, answer 441 correct\n",
"Puzzle 10.2: .1177 seconds, answer 18559 correct\n",
"Puzzle 11.1: .0007 seconds, answer 574 correct\n",
"Puzzle 11.2: .0016 seconds, answer 306594217920240 correct\n",
"Puzzle 12.1: 211.1226 seconds, answer 454 correct\n",
"\n",
"Time in seconds: sum = 214.970, mean = 9.347, median = .008, max = 211.123\n"
]
}
],
"source": [
"summary(answers) # Below is the most recent run of this notebook; the chart above is from a previous run."
" | DAY | LLM<br>Name | LLM<br>Time | Human<br>Time | LLM<br>LOC | Human<br>LOC | Comments |\n",
" | --- | ------ | ---- | ----- | --- | ----- | ---|\n",
" | 1.1 | Gemini | .0007 | .0002 | 51 | 6 | Straightforward and easy for LLM and human. | \n",
" | 1.2 | Gemini | .0008 | .0004 | 75 | 11 | Both LLM and human erred on the distance from 0 to 0. | \n",
" | 2.1 | Claude | .0355 | .0001 | 29 | 17 | Easy | \n",
" | 2.2 | Claude | .0403 | .0002 | 35 | 16 | Both LLM and human found the more efficient half-digits approach | \n",
" | 3.1 | ChatGPT | .0019 | .0003 | 22 | 11 | Easy | \n",
" | 3.2 | ChatGPT | .0026 | .0008 | 42 | 14 | Easy | \n",
" | 4.1 | Gemini | .0084 | .0194 | 44 | 9 | Easy | \n",
" | 4.2 | Gemini | .0329 | .0495 | 52 | 8 | LLM chose the less efficient scan-whole-grid approach | \n",
" | 5.1 | Claude | .0029 | .0045 | 45 | 11 | Easy | \n",
" | 5.2 | Claude | .0001 | .0000 | 58 | 9 | Easy | \n",
" | 6.1 | ChatGPT | .0034 | .0008 | 67 | 7 | Easy; bad “if x: True else: False” idiom by LLM | \n",
" | 6.2 | ChatGPT | .0023 | .0013 | 87 | 27 | Easy; LLM overly verbose | \n",
" | 7.1 | Gemini | .0004 | .0003 | 63 | 13 | Easy | \n",
" | 7.2 | Gemini | .0011 | .0007 | 70 | 11 | Easy | \n",
" | 8.1 | Claude | .2886 | .1981 | 91 | 27 | Easy | \n",
" | 8.2 | Claude | .2857 | .2034 | 82 | 11 | Easy; but LLMs Union-Find data type runs slower than mine. | \n",
" | 9.1 | ChatGPT | .0094 | .0187 | 33 | 7 | Easy | \n",
" | 9.2 | ChatGPT | .4376 | .0046 | 157 | 36 | LLM code a bit complicated; human uses “2 point” trick for speedup | \n",
" | 10.1 | Gemini | .0019 | .0242 | 101 | 18 | Easy | \n",
" | 10.2 | Gemini | .0461 | .0680 | 70 | 13 | milp solutions similar; LLM offers other solutions | \n",
" | 11.1 | Claude | .0003 | .0001 | 83 | 11 | Easy; LLM has a bit of vestigial code | \n",
" | 11.2 | Claude | .0009 | .0010 | 77 | 11 | Easy | \n",
" | 12.1 | ChatGPT | 112.2 | .0006 | 238 | 20 | Human saw shortcut to avoid search; LLM wrote search functions | \n",
" | **TOTAL** | |**1.204** | **.597** | **1672** | **324** | **Total time ignores 12.1. Overall, Human code is 5x briefer, 2x faster** | "
]
}
],
@ -3986,7 +3951,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.5"
"version": "3.13.3"
}
},
"nbformat": 4,

File diff suppressed because one or more lines are too long