Add files via upload
This commit is contained in:
parent
11766b5972
commit
03a0fd3abd
File diff suppressed because one or more lines are too long
@ -8,8 +8,18 @@
|
||||
"\n",
|
||||
"# Data and Code for [Tracking Trump: Electoral Votes Edition](Electoral%20Votes.ipynb)\n",
|
||||
"\n",
|
||||
"First fetch the state-by-state, month-by-month approval data from the **[Tracking Trump](https://morningconsult.com/tracking-trump/)** web page at *Morning Consult*\n",
|
||||
" and cache it locally: "
|
||||
" *Morning Consult* has a **[Tracking Trump](https://morningconsult.com/tracking-trump/)** web page that\n",
|
||||
" gives state-by-state, month-by-month presidential approval poll data. Within the web page there is some Javascript from which\n",
|
||||
" we can extract the data we need. It looks like this:\n",
|
||||
"\n",
|
||||
" var mc_state_trend = [[\"Demographic\",\"January 1, 2017\",\"February 1, 2017\", ...]\n",
|
||||
" [\"Alabama\",\"62\",\"26\",\"65\",\"29\", ...], \n",
|
||||
" ... ]\n",
|
||||
" \n",
|
||||
"The first row is a header (each date is a day at which polls were aggregated).\n",
|
||||
"The subsequent rows each start with the state name, followed by the approval and disapproval percentages for each date. That is, if there are 34 dates, there will by 68 numbers. The row shown above is saying that on January 1, 2017, 62% of Alabamans approved and 26% disapproved; then on February 1, 2017, 65% approved and 29% disapproved, and so on. Our job is to extract this data and find ways to visualize and understand it.\n",
|
||||
"\n",
|
||||
"First fetch the page and save it locally:"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -80,16 +90,19 @@
|
||||
"# From https://projects.fivethirtyeight.com/trump-approval-ratings/\n",
|
||||
"# A dict of {'date': country-wide-net-approval}\n",
|
||||
"net_usa = {\n",
|
||||
" '1-Jan-17': +10, \n",
|
||||
" '1-Feb-17': 0, '1-Mar-17': -6, '1-Apr-17': -13, '1-May-17': -11,\n",
|
||||
" '1-Jun-17': -16, '1-Jul-17': -15, '1-Aug-17': -19, '1-Sep-17': -20,\n",
|
||||
" '1-Oct-17': -17, '1-Nov-17': -19, '1-Dec-17': -18, '1-Jan-18': -18,\n",
|
||||
" '1-Feb-18': -15, '1-Mar-18': -14, '1-Apr-18': -13, '1-May-18': -12,\n",
|
||||
" '1-Jun-18': -11, '1-Jul-18': -10, '1-Aug-18': -12, '1-Sep-18': -14,\n",
|
||||
" '1-Oct-18': -11, '1-Nov-18': -11, '1-Dec-18': -10, '1-Jan-19': -12,\n",
|
||||
" '1-Feb-19': -16, '1-Mar-19': -11, '1-Apr-19': -11, '1-May-19': -12,\n",
|
||||
" '1-Jun-19': -12, '1-Jul-19': -11, '1-Aug-19': -10, '1-Sep-19': -13,\n",
|
||||
" '1-Oct-19': -13}\n"
|
||||
" 'January 2017': 10, 'January 2018': -18, 'January 2019': -12, \n",
|
||||
" 'February 2017': 0, 'February 2018': -15, 'February 2019': -16, \n",
|
||||
" 'March 2017': -6, 'March 2018': -14, 'March 2019': -11, \n",
|
||||
" 'April 2017': -13, 'April 2018': -13, 'April 2019': -11, \n",
|
||||
" 'May 2017': -11, 'May 2018': -12, 'May 2019': -12, \n",
|
||||
" 'June 2017': -16, 'June 2018': -11, 'June 2019': -12, \n",
|
||||
" 'July 2017': -15, 'July 2018': -10, 'July 2019': -11, \n",
|
||||
" 'August 2017': -19, 'August 2018': -12, 'August 2019': -10, \n",
|
||||
" 'September 2017': -20, 'September 2018': -14, 'September 2019': -13, \n",
|
||||
" 'October 2017': -17, 'October 2018': -11, 'October 2019': -13, \n",
|
||||
" 'November 2017': -19, 'November 2018': -11, 'November 2019': -10,\n",
|
||||
" 'December 2017': -18, 'December 2018': -10\n",
|
||||
" }"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -107,42 +120,95 @@
|
||||
"source": [
|
||||
"class State(namedtuple('_', 'name, ev, lean, approvals, disapprovals')):\n",
|
||||
" '''A State has a name, the number of electoral votes, the partisan lean,\n",
|
||||
" and two dicts of {date: percent}: approvals and disapprovals'''\n",
|
||||
" and two dicts of {date: percent}: approvals and disapprovals.'''\n",
|
||||
"\n",
|
||||
"def parse_page(filename='evs.html', data=state_data):\n",
|
||||
" \"Read data from the file and return (list of dates, list of `State`s, last date).\"\n",
|
||||
" # File format: Date headers, then [state, approval, disapproval ...]\n",
|
||||
" # [[\"Demographic\",\"1-Jan-17\",\"\",\"1-Feb-17\",\"\", ... \"1-Apr-19\",\"\"],\n",
|
||||
" # [\"Alabama\",\"62\",\"26\",\"65\",\"29\", ... \"61\",\"35\"], ... ] =>\n",
|
||||
" # State(\"Alabama\", 9, +27, approvals={\"1-Jan-17\": 62, ...}, \n",
|
||||
" # disapprovals={\"1-Jan-17\": 26, ...}), ...\n",
|
||||
"def parse_page(filename, state_data=state_data) -> tuple:\n",
|
||||
" \"Read data from the file and return tuple: (list of `State`s, list of dates).\"\n",
|
||||
" text = re.findall(r'\\[\\[.*?\\]\\]', open(filename).read())[0]\n",
|
||||
" header, *table = ast.literal_eval(text)\n",
|
||||
" dates = header[1::2] # Every other header entry is a date\n",
|
||||
" states = [State(name, *data[name],\n",
|
||||
" dates = header[1::2]\n",
|
||||
" states = [State(name, *state_data[name],\n",
|
||||
" approvals=dict(zip(dates, map(int, numbers[0::2]))),\n",
|
||||
" disapprovals=dict(zip(dates, map(int, numbers[1::2]))))\n",
|
||||
" for (name, *numbers) in table]\n",
|
||||
" return states, dates, dates[-1]\n",
|
||||
" return states, dates\n",
|
||||
"\n",
|
||||
"states, dates, now = parse_page()\n",
|
||||
"states, dates = parse_page('evs.html')\n",
|
||||
"now = dates[-1]\n",
|
||||
"\n",
|
||||
"def EV(states, date=now, swing=0) -> int:\n",
|
||||
" \"Total electoral votes with net positive approval (plus half the votes for net zero).\"\n",
|
||||
" return sum(s.ev * (1/2 if net(s, date) + swing == 0 else int(net(s, date) + swing > 0))\n",
|
||||
" for s in states)\n",
|
||||
" \"Total electoral votes of states with net positive approval (plus half for net zero).\"\n",
|
||||
" return sum(s.ev * is_positive(net(s, date) + swing) for s in states)\n",
|
||||
"\n",
|
||||
"def is_positive(x) -> int:\n",
|
||||
" \"1 if x is positive; 0 if x is negative; 1/2 if x is zero.\"\n",
|
||||
" return 1/2 if x == 0 else int(x > 0)\n",
|
||||
"\n",
|
||||
"def margin(states, date=now) -> int:\n",
|
||||
" \"What's the least swing that would lead to a majority?\"\n",
|
||||
" return next(swing for swing in range(-50, 50) if EV(states, date, swing) >= 270)\n",
|
||||
" return min(swing for swing in range(-50, 50) if EV(states, date, swing) >= 270)\n",
|
||||
"\n",
|
||||
"def net(state, date=now) -> int: return state.approvals[date] - state.disapprovals[date]\n",
|
||||
"def undecided(state, date=now) -> int: return 100 - state.approvals[date] - state.disapprovals[date]\n",
|
||||
"def movement(state, date=now) -> float: return undecided(state, date) / 5 + 2 * 𝝈(state)\n",
|
||||
"def 𝝈(state, recent=dates[-12:]) -> float: return stdev(net(state, d) for d in recent)\n",
|
||||
"def 𝝈(state, recent=dates[-18:]) -> float: return stdev(net(state, d) for d in recent)\n",
|
||||
"def is_swing(state) -> bool: return abs(net(state)) < movement(state)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['January 2017',\n",
|
||||
" 'February 2017',\n",
|
||||
" 'March 2017',\n",
|
||||
" 'April 2017',\n",
|
||||
" 'May 2017',\n",
|
||||
" 'June 2017',\n",
|
||||
" 'July 2017',\n",
|
||||
" 'August 2017',\n",
|
||||
" 'September 2017',\n",
|
||||
" 'October 2017',\n",
|
||||
" 'November 2017',\n",
|
||||
" 'December 2017',\n",
|
||||
" 'January 2018',\n",
|
||||
" 'February 2018',\n",
|
||||
" 'March 2018',\n",
|
||||
" 'April 2018',\n",
|
||||
" 'May 2018',\n",
|
||||
" 'June 2018',\n",
|
||||
" 'July 2018',\n",
|
||||
" 'August 2018',\n",
|
||||
" 'September 2018',\n",
|
||||
" 'October 2018',\n",
|
||||
" 'November 2018',\n",
|
||||
" 'December 2018',\n",
|
||||
" 'January 2019',\n",
|
||||
" 'February 2019',\n",
|
||||
" 'March 2019',\n",
|
||||
" 'April 2019',\n",
|
||||
" 'May 2019',\n",
|
||||
" 'June 2019',\n",
|
||||
" 'July 2019',\n",
|
||||
" 'August 2019',\n",
|
||||
" 'September 2019',\n",
|
||||
" 'October 2019',\n",
|
||||
" 'November 2019']"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"dates"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@ -152,11 +218,11 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def labels(xlab, ylab): plt.xlabel(xlab); plt.ylabel(ylab); plt.grid(True); plt.legend()\n",
|
||||
"def labels(xlab, ylab): plt.xlabel(xlab); plt.ylabel(ylab); plt.legend()\n",
|
||||
"\n",
|
||||
"def grid(): plt.minorticks_on(); plt.grid(which='minor', ls=':', alpha=0.7)\n",
|
||||
" \n",
|
||||
@ -169,16 +235,17 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def show_months(states=states, dates=dates, swing=3):\n",
|
||||
"def show_evs(states=states, dates=dates, swing=3):\n",
|
||||
" \"A plot of electoral votes by month.\"\n",
|
||||
" plt.rcParams[\"figure.figsize\"] = [10, 7]\n",
|
||||
" plt.style.use('fivethirtyeight')\n",
|
||||
" N = len(dates)\n",
|
||||
" err = [[EV(states, date) - EV(states, date, -swing) for date in dates],\n",
|
||||
" [EV(states, date, swing) - EV(states, date) for date in dates]]\n",
|
||||
" [EV(states, date, +swing) - EV(states, date) for date in dates]]\n",
|
||||
" grid()\n",
|
||||
" plt.plot(range(N), [270] * N, color='darkorange', label=\"270 EVs\", lw=2)\n",
|
||||
" plt.errorbar(range(N), [EV(states, date) for date in dates], fmt='D-',\n",
|
||||
@ -188,11 +255,12 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def show_approval(states=states, dates=dates):\n",
|
||||
" \"A plot of net popularity by month.\"\n",
|
||||
" plt.rcParams[\"figure.figsize\"] = [10, 7]\n",
|
||||
" plt.style.use('fivethirtyeight')\n",
|
||||
" N = len(dates)\n",
|
||||
@ -203,22 +271,6 @@
|
||||
" labels('Months into term', 'Net popularity')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"@markdown\n",
|
||||
"def by_month(states, dates=dates[::-1]):\n",
|
||||
" yield header('|Month|EVs|Margin|Country|Undecided|')\n",
|
||||
" for date in dates:\n",
|
||||
" month = date.replace('1-', '').replace('-', ' 20')\n",
|
||||
" yield (f'|{month}|{int(EV(states, date))}|{margin(states, date)}%|{net_usa[date]:+d}%'\n",
|
||||
" f'|{sum(s.ev * undecided(s, date) for s in states) / 538:.0f}% '\n",
|
||||
" f'({sum(undecided(s, date) > 5 for s in states)} states)')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
@ -228,15 +280,15 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"@markdown\n",
|
||||
"def show_states(states=states, d=now, ref='1-Jan-17'):\n",
|
||||
"def show_states(states=states, d=now, ref='January 2017'):\n",
|
||||
" \"A table of states, sorted by net approval, with electoral votes.\"\n",
|
||||
" total = 0\n",
|
||||
" yield header(f'|State|Net|Move|EV|ΣEV|+|−|?|𝝈|Δ|')\n",
|
||||
" yield header(f'|State|Net|Move|EV|ΣEV|+|−|?|𝝈|')\n",
|
||||
" for s in sorted(states, key=net, reverse=True):\n",
|
||||
" total += s.ev\n",
|
||||
" b = '**' * is_swing(s)\n",
|
||||
" yield (f'|{swing_name(s)}|{b}{net(s, d):+d}%{b}|{b}±{movement(s):.0f}%{b}|{s.ev}|{total}'\n",
|
||||
" f'|{s.approvals[d]}%|{s.disapprovals[d]}%|{undecided(s, now)}%|±{𝝈(s):3.1f}%'\n",
|
||||
" f'|{net(s, d) - net(s, ref):+d}%|')\n",
|
||||
" f'|{s.approvals[d]}%|{s.disapprovals[d]}%|{undecided(s, now)}%|±{𝝈(s):3.1f}%')\n",
|
||||
" \n",
|
||||
"def swing_name(s) -> str: return ('**' + s.name.upper() + '**') if is_swing(s) else s.name"
|
||||
]
|
||||
@ -248,12 +300,13 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"@markdown\n",
|
||||
"def show_parp(states=states, dates=(now, '1-Jan-19', '1-Jan-18', '1-Jan-17')):\n",
|
||||
" def year(date): return '' if date == now else date[-2:]\n",
|
||||
" fields = [f'PARP {year(date)}|Net {year(date)}' for date in dates]\n",
|
||||
"def show_parp(states=states, dates=(now, 'January 2019', 'January 2018', 'January 2017')):\n",
|
||||
" \"A table of states, sorted by Popularity Above Replacement President.\"\n",
|
||||
" def year(date): return '' if date == now else \"'\" + date[-2:]\n",
|
||||
" fields = [f\"PARP{year(date)}|(Net)\" for date in dates]\n",
|
||||
" yield header(f'|State|Lean|EV|{\"|\".join(fields)}|')\n",
|
||||
" for s in sorted(states, key=parp, reverse=True):\n",
|
||||
" fields = [f'{parp(s, date):+d}|{net(s, date):+d}' for date in dates]\n",
|
||||
" fields = [f'{parp(s, date):+d}|({net(s, date):+d})' for date in dates]\n",
|
||||
" yield f'|{swing_name(s)}|{s.lean:+d}|{s.ev}|{\"|\".join(fields)}|'"
|
||||
]
|
||||
},
|
||||
@ -261,7 +314,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"I really should have some more tests."
|
||||
"**Tests** (I really should have more)."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user