Morning Consult has a Tracking Trump web page that gives state-by-state, month-by-month presidential approval poll data. Within the web page there is some Javascript from which we can extract the data we need. It looks like this:
var mc_state_trend = [["Demographic","Jan-17","","Feb-17","", ...]
["Alabama","62","26","65","29", ...],
... ]
The first row is a header (each date is a month at which polls were aggregated). The subsequent rows each start with the state name, followed by the approval and disapproval percentages for each date. That is, if there are 34 dates, there will by 68 numbers. The row shown above is saying that in January, 2017, 62% of Alabamans approved and 26% disapproved; then in February, 2017, 65% approved and 29% disapproved, and so on. Our job is to extract this data and find ways to visualize and understand it.
First fetch the page and save it locally:
! curl -s -o evs.html https://morningconsult.com/tracking-trump-2/
Now some imports:
%matplotlib inline
import matplotlib.pyplot as plt
import re
import ast
from collections import namedtuple
from IPython.display import display, Markdown
from statistics import stdev
Additional data: the variable state_data
contains the electoral votes by state and the partisan lean by state (how much more Republican (plus) or Democratic (minus) leaning the state is compared to the country as a whole, across recent elections).
The variable net_usa
has the country-wide net presidential approval by month.
# From https://github.com/fivethirtyeight/data/tree/master/partisan+lean
# A dict of {"state name": (electoral_votes, partisan_lean)}
state_data = {
"Alabama": (9, +27), "Alaska": (3, +15), "Arizona": (11, +9),
"Arkansas": (6, +24), "California": (55, -24), "Colorado": (9, -1),
"Connecticut": (7, -11), "Delaware": (3, -14), "District of Columbia": (3, -43),
"Florida": (29, +5), "Georgia": (16, +12), "Hawaii": (4, -36),
"Idaho": (4, +35), "Illinois": (20, -13), "Indiana": (11, +18),
"Iowa": (6, +6), "Kansas": (6, +23), "Kentucky": (8, +23),
"Louisiana": (8, +17), "Maine": (4, -5), "Maryland": (10, -23),
"Massachusetts": (11, -29), "Michigan": (16, -1), "Minnesota": (10, -2),
"Mississippi": (6, +15), "Missouri": (10, +19), "Montana": (3, +18),
"Nebraska": (5, +24), "Nevada": (6, +1), "New Hampshire": (4, +2),
"New Jersey": (14, -13), "New Mexico": (5, -7), "New York": (29, -22),
"North Carolina": (15, +5), "North Dakota": (3, +33), "Ohio": (18, +7),
"Oklahoma": (7, +34), "Oregon": (7, -9), "Pennsylvania": (20, +1),
"Rhode Island": (4, -26), "South Carolina": (9, +17), "South Dakota": (3, +31),
"Tennessee": (11, +28), "Texas": (38, +17), "Utah": (6, +31),
"Vermont": (3, -24), "Virginia": (13, 0), "Washington": (12, -12),
"West Virginia": (5, +30), "Wisconsin": (10, +1), "Wyoming": (3, +47)}
# From https://projects.fivethirtyeight.com/trump-approval-ratings/
# A dict of {'date': country-wide-net-approval}, taken from 1st of month.
net_usa = {
'Jan-17': 10, 'Jan-18': -18, 'Jan-19': -12, 'Jan-20': -11,
'Feb-17': 0, 'Feb-18': -15, 'Feb-19': -16, 'Feb-20': -10,
'Mar-17': -6, 'Mar-18': -14, 'Mar-19': -11,
'Apr-17': -13, 'Apr-18': -13, 'Apr-19': -11,
'May-17': -11, 'May-18': -12, 'May-19': -12,
'Jun-17': -16, 'Jun-18': -11, 'Jun-19': -12,
'Jul-17': -15, 'Jul-18': -10, 'Jul-19': -11,
'Aug-17': -19, 'Aug-18': -12, 'Aug-19': -10,
'Sep-17': -20, 'Sep-18': -14, 'Sep-19': -13,
'Oct-17': -17, 'Oct-18': -11, 'Oct-19': -13,
'Nov-17': -19, 'Nov-18': -11, 'Nov-19': -13,
'Dec-17': -18, 'Dec-18': -10, 'Dec-19': -12,
}
Now the code to parse and manipulate the data:
class State(namedtuple('_', 'name, ev, lean, approvals, disapprovals')):
'''A State has a name, the number of electoral votes, the partisan lean,
and two dicts of {date: percent}: approvals and disapprovals.'''
def parse_page(filename, state_data=state_data) -> tuple:
"Read data from the file and return tuple: (list of `State`s, list of dates)."
text = re.findall(r'\[\[.*?\]\]', open(filename).read())[0]
header, *table = ast.literal_eval(text)
dates = header[1::2]
states = [State(name, *state_data[name],
approvals=dict(zip(dates, map(int, numbers[0::2]))),
disapprovals=dict(zip(dates, map(int, numbers[1::2]))))
for (name, *numbers) in table]
return states, dates
states, dates = parse_page('evs.html')
now = dates[-1]
def EV(states, date=now, swing=0) -> int:
"Total electoral votes of states with net positive approval (plus half for net zero)."
return sum(s.ev * is_positive(net(s, date) + swing) for s in states)
def is_positive(x) -> int:
"1 if x is positive; 0 if x is negative; 1/2 if x is zero."
return 1/2 if x == 0 else int(x > 0)
def margin(states, date=now) -> int:
"What's the least swing that would lead to a majority?"
return min(swing for swing in range(-50, 50) if EV(states, date, swing+0.1) >= 270)
def net(state, date=now) -> int: return state.approvals[date] - state.disapprovals[date]
def undecided(state, date=now) -> int: return 100 - state.approvals[date] - state.disapprovals[date]
def movement(state, date=now) -> float: return undecided(state, date) / 5 + 2 * 𝝈(state)
def 𝝈(state, recent=dates[-12:]) -> float: return stdev(net(state, d) for d in recent)
def is_swing(state) -> bool: return abs(net(state)) < movement(state)
Various functions for displaying data:
def header(head) -> str: return head + '\n' + '-'.join('|' * head.count('|'))
def markdown(fn) -> callable: return lambda *args: display(Markdown('\n'.join(fn(*args))))
def parp(state, date=now) -> int: return net(state, date) - state.lean
def grid(dates, xlab, ylab):
plt.minorticks_on(); plt.grid(which='minor', axis='y', ls=':', alpha=0.7)
plt.xticks(range(len(dates)), dates, rotation=90)
plt.xlabel(xlab); plt.ylabel(ylab); plt.legend()
def show_evs(states=states, dates=dates, swing=3):
"A plot of electoral votes by month."
plt.rcParams["figure.figsize"] = [10, 7]
plt.style.use('fivethirtyeight')
N = len(dates)
err = [[EV(states, date) - EV(states, date, -swing) for date in dates],
[EV(states, date, +swing) - EV(states, date) for date in dates]]
plt.plot(range(N), [270] * N, color='darkorange', label="270 EVs", lw=2)
plt.errorbar(range(N), [EV(states, date) for date in dates], fmt='D-',
yerr=err, ecolor='grey', capsize=7, label='Trump EVs ±3% swing', lw=2)
grid(dates, 'Date', 'Electoral Vptes')
#labels('Date', 'Electoral Votes')
def show_approval(states=states, dates=dates):
"A plot of net popularity by month."
plt.rcParams["figure.figsize"] = [10, 7]
plt.style.use('fivethirtyeight')
N = len(dates)
plt.plot(range(N), [0] * N, label='Net zero', color='darkorange')
plt.plot(range(N), [-margin(states, date) for date in dates], 'D-', label='Margin to 270')
plt.plot(range(N), [net_usa[date] for date in dates], 'go-', label='Country-wide Net')
grid(dates, 'Date', 'Net popularity')
def show_swings(swings=range(10)):
print('Swing EV Range')
for swing in swings:
s = swing + 0.5
print(f'±{s:3.1f}% {EV(states, swing=-s):3} to {EV(states, swing=s):3}')
@markdown
def show_states(states=states, d=now, ref='Jan-17'):
"A table of states, sorted by net approval, with electoral votes."
total = 0
yield header(f'|State|Net|Move|EV|ΣEV|+|−|?|𝝈|')
for s in sorted(states, key=net, reverse=True):
total += s.ev
b = '**' * is_swing(s)
yield (f'|{swing_name(s)}|{b}{net(s, d):+d}%{b}|{b}±{movement(s):.0f}%{b}|{s.ev}|{total}'
f'|{s.approvals[d]}%|{s.disapprovals[d]}%|{undecided(s, now)}%|±{𝝈(s):3.1f}%')
def swing_name(s) -> str: return ('**' + s.name.upper() + '**') if is_swing(s) else s.name
@markdown
def show_parp(states=states, dates=(now, 'Jan-19', 'Jan-18', 'Jan-17')):
"A table of states, sorted by Popularity Above Replacement President."
def year(date): return '' if date == now else "'" + date[-2:]
fields = [f"PARP{year(date)}|(Net)" for date in dates]
yield header(f'|State|Lean|EV|{"|".join(fields)}|')
for s in sorted(states, key=parp, reverse=True):
fields = [f'{parp(s, date):+d}|({net(s, date):+d})' for date in dates]
yield f'|{swing_name(s)}|{s.lean:+d}|{s.ev}|{"|".join(fields)}|'
Tests (I really should have more):
assert len(states) == 51, "50 states plus DC"
assert all(s.ev >= 3 for s in states), "All states have two senators and at least one rep."
assert sum(s.ev for s in states) == 538, "Total of 538 electoral votes."