notebook.community

Edit and run



In [1]:

    
from bs4 import BeautifulSoup
import pandas as pd
import csv
import numpy as np
import seaborn as sns
import time
import matplotlib.pyplot as plt
import requests
plt.style.use('fivethirtyeight')
%matplotlib inline









    



/Users/Esh/anaconda/lib/python2.7/site-packages/matplotlib/font_manager.py:273: UserWarning: Matplotlib is building the font cache using fc-list. This may take a moment.
  warnings.warn('Matplotlib is building the font cache using fc-list. This may take a moment.')
/Users/Esh/anaconda/lib/python2.7/site-packages/matplotlib/__init__.py:872: UserWarning: axes.color_cycle is deprecated and replaced with axes.prop_cycle; please use the latter.
  warnings.warn(self.msg_depr % (key, alt_key))



In [2]:

    
#Scrape data (thanks 538!)
link = requests.get('http://projects.fivethirtyeight.com/election-2016/delegate-targets/democrats/').text
soup = BeautifulSoup(link)
table = soup.html.body.table
rows = table.tbody.find_all('tr')
data = []
for row in rows:
    cols = row.find_all('td')
    cols = [elem.text.strip() for elem in cols if elem]
    data.append(cols)









    



/Users/Esh/anaconda/lib/python2.7/site-packages/bs4/__init__.py:166: UserWarning: No parser was explicitly specified, so I'm using the best available HTML parser for this system ("lxml"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.

To get rid of this warning, change this:

 BeautifulSoup([your markup])

to this:

 BeautifulSoup([your markup], "lxml")

  markup_type=markup_type))



In [3]:

    
#Convert to data frame and clean up
D = pd.DataFrame(data,columns = ['Date','State','Total','Clinton','Sanders'])
D['Clinton'] = D['Clinton'].apply(lambda x: x.split('/')[0])
D['Sanders'] = D['Sanders'].apply(lambda x: x.split('/')[0])
D = D.applymap(lambda x: np.nan if not x else x)
D['Date'] = D['Date'].fillna(method='ffill').apply(lambda x: x.replace('Feb.','February') if 'Feb' in x else x) + ' 2016'
D[['Total','Clinton','Sanders']] = D[['Total','Clinton','Sanders']].astype(float)
#Handle Missouri like the NYT
D.set_value(25,'Total',64)
D.set_value(25,'Clinton',32)
D.set_value(25,'Sanders',32)









    Out[3]:






  
    
      
      Date
      State
      Total
      Clinton
      Sanders
    
  
  
    
      0
      February 1 2016
      Iowa
      44
      23
      21
    
    
      1
      February 9 2016
      New Hampshire
      24
      9
      15
    
    
      2
      February 20 2016
      Nevada
      35
      20
      15
    
    
      3
      February 27 2016
      South Carolina
      53
      39
      14
    
    
      4
      March 1 2016
      Alabama
      53
      44
      9
    
    
      5
      March 1 2016
      American Samoa
      6
      4
      2
    
    
      6
      March 1 2016
      Arkansas
      32
      22
      10
    
    
      7
      March 1 2016
      Colorado
      66
      28
      38
    
    
      8
      March 1 2016
      Georgia
      102
      74
      28
    
    
      9
      March 1 2016
      Massachusetts
      91
      46
      45
    
    
      10
      March 1 2016
      Minnesota
      77
      31
      46
    
    
      11
      March 1 2016
      Oklahoma
      38
      17
      21
    
    
      12
      March 1 2016
      Tennessee
      67
      44
      23
    
    
      13
      March 1 2016
      Texas
      222
      148
      74
    
    
      14
      March 1 2016
      Vermont
      16
      0
      16
    
    
      15
      March 1 2016
      Virginia
      95
      62
      33
    
    
      16
      March 5 2016
      Kansas
      33
      9
      24
    
    
      17
      March 5 2016
      Louisiana
      51
      37
      14
    
    
      18
      March 5 2016
      Nebraska
      25
      10
      15
    
    
      19
      March 6 2016
      Maine
      25
      9
      16
    
    
      20
      March 8 2016
      Michigan
      130
      63
      67
    
    
      21
      March 8 2016
      Mississippi
      36
      32
      4
    
    
      22
      March 12 2016
      Northern Marianas
      6
      4
      2
    
    
      23
      March 15 2016
      Florida
      214
      124
      60
    
    
      24
      March 15 2016
      Illinois
      156
      66
      64
    
    
      25
      March 15 2016
      Missouri
      64
      32
      32
    
    
      26
      March 15 2016
      North Carolina
      107
      61
      46
    
    
      27
      March 15 2016
      Ohio
      143
      74
      56
    
    
      28
      March 22 2016
      Arizona
      75
      NaN
      NaN
    
    
      29
      March 22 2016
      Idaho
      23
      NaN
      NaN
    
    
      30
      March 22 2016
      Utah
      33
      NaN
      NaN
    
    
      31
      March 26 2016
      Alaska
      16
      NaN
      NaN
    
    
      32
      March 26 2016
      Hawaii
      25
      NaN
      NaN
    
    
      33
      March 26 2016
      Washington
      101
      NaN
      NaN
    
    
      34
      April 5 2016
      Wisconsin
      86
      NaN
      NaN
    
    
      35
      April 9 2016
      Wyoming
      14
      NaN
      NaN
    
    
      36
      April 19 2016
      New York
      247
      NaN
      NaN
    
    
      37
      April 26 2016
      Connecticut
      55
      NaN
      NaN
    
    
      38
      April 26 2016
      Delaware
      21
      NaN
      NaN
    
    
      39
      April 26 2016
      Maryland
      95
      NaN
      NaN
    
    
      40
      April 26 2016
      Pennsylvania
      189
      NaN
      NaN
    
    
      41
      April 26 2016
      Rhode Island
      24
      NaN
      NaN
    
    
      42
      May 3 2016
      Indiana
      83
      NaN
      NaN
    
    
      43
      May 7 2016
      Guam
      7
      NaN
      NaN
    
    
      44
      May 10 2016
      West Virginia
      29
      NaN
      NaN
    
    
      45
      May 14 2016
      Democrats Abroad
      13
      NaN
      NaN
    
    
      46
      May 17 2016
      Kentucky
      55
      NaN
      NaN
    
    
      47
      May 17 2016
      Oregon
      61
      NaN
      NaN
    
    
      48
      June 4 2016
      Virgin Islands
      7
      NaN
      NaN
    
    
      49
      June 5 2016
      Puerto Rico
      60
      NaN
      NaN
    
    
      50
      June 7 2016
      California
      475
      NaN
      NaN
    
    
      51
      June 7 2016
      Montana
      21
      NaN
      NaN
    
    
      52
      June 7 2016
      New Jersey
      126
      NaN
      NaN
    
    
      53
      June 7 2016
      New Mexico
      34
      NaN
      NaN
    
    
      54
      June 7 2016
      South Dakota
      20
      NaN
      NaN
    
    
      55
      June 7 2016
      North Dakota
      18
      NaN
      NaN
    
    
      56
      June 14 2016
      District of Columbia
      20
      NaN
      NaN



In [4]:

    
#Computer some diffs and a regression line
D['Diff'] = D['Clinton'] - D['Sanders']
D[['CumTotal','CumClinton','CumSanders']] = D[['Total','Clinton','Sanders']].cumsum()
D['CumDiff'] = D['CumClinton'] - D['CumSanders']
totalDelegates = D['CumTotal'].max()
D['PercClinton'] = D['CumClinton'].apply(lambda x: x/totalDelegates*100)
D['PercSanders'] = D['CumSanders'].apply(lambda x: x/totalDelegates*100)
D['PercDiff'] = D['CumDiff']/totalDelegates*100
D['NormPercDiff'] = D['Diff']/D['Total']*100
D['NormClinton'] = D['Clinton']/D['Total']*100
D['NormSanders'] = D['Sanders']/D['Total']*100



In [576]:

    
toNow = D[[time.localtime() > time.strptime(elem,'%B %d %Y') for elem in D['Date']]]
diffs = toNow.ix[:,'PercDiff'].values
slope, intercept = np.polyfit(range(len(diffs)),diffs,1)
fits = slope * np.arange(len(diffs)) + intercept
normdiffs = toNow.ix[pd.notnull(toNow['NormPercDiff']),'NormPercDiff'].values


fig, axes = plt.subplots(2,1, figsize=(30,16),sharex=True)

axes[0].plot(toNow.index,toNow['PercClinton'],'cornflowerblue',
         toNow.index,toNow['PercSanders'],'mediumseagreen',
         toNow.index,toNow['PercDiff'],'k--', linewidth=4);
axes[0].plot(np.arange(len(diffs)),fits,'firebrick',alpha=.6);
axes[0].yaxis.set_tick_params(labelsize=24);
axes[0].set_ylabel('Percent of All \n Possible Pledged Delegates',fontsize=26);
axes[0].set_ylim([-1,50]);
axes[0].legend(('Clinton','Sanders','Difference','Linear Fit'),fontsize=22);
axes[0].set_title('Cumulative Percent of Delegates',fontsize=30,style='italic')

axes[1].plot(toNow.index,toNow['NormPercDiff'],'k--', linewidth=4);
axes[1].axhline(color='black',linewidth=2);
sns.regplot(x=np.arange(len(diffs)),y=normdiffs,ax=axes[1],scatter=False,lowess=True,color='firebrick',line_kws={'alpha':.6,'linewidth':4})
axes[1].yaxis.set_tick_params(labelsize=24);
axes[1].set_ylabel('Sanders more \t No Diff \t Clinton more'.expandtabs(),fontsize=26);
axes[1].set_ylim([-100,100]);
axes[1].set_yticklabels([100,50,0,50,100]);
axes[1].legend(('Difference','Lowess Fit'),fontsize=22);
axes[1].get_legend().legendHandles[1].set_color('firebrick');
plt.xticks(range(len(toNow)),toNow['State'].apply(lambda x: x.replace(' ','\n')),rotation=45,fontsize=24,multialignment='center');

for i,label in enumerate(axes[1].get_xticklabels()):
    if toNow.ix[i,'NormPercDiff'] > 0:
        label.set_color('cornflowerblue')
    else:
        label.set_color('mediumseagreen')
    if i == 25:
        label.set_color('black')
plt.tight_layout()
axes[1].set_title('Percent Difference Within State',fontsize=30,style='italic')
plt.suptitle("        The Battleground",fontsize=45,y=1.03,fontweight='bold');
plt.savefig('difference.png',bbox_inches='tight')



In [549]:

    
#toNow = D[[time.localtime() > time.strptime(elem,'%B %d %Y') for elem in D['Date']]]
toNow = D
#diffs = toNow.ix[:,'PercDiff'].values
#slope, intercept = np.polyfit(range(len(diffs)),diffs,1)
#fits = slope * np.arange(len(diffs)) + intercept
#normdiffs = toNow.ix[pd.notnull(toNow['NormPercDiff']),'NormPercDiff'].values

fig, axes = plt.subplots(2,1, figsize=(60,20),sharex=True)

axes[0].plot(toNow.index,toNow['PercClinton'],'cornflowerblue',
         toNow.index,toNow['PercSanders'],'mediumseagreen',
         toNow.index,toNow['PercDiff'],'k--', linewidth=4);
#axes[0].plot(np.arange(len(diffs)),fits,'firebrick',alpha=.6);
axes[0].yaxis.set_tick_params(labelsize=24);
axes[0].set_ylabel('Percent of All \n Possible Pledged Delegates',fontsize=30);
axes[0].set_ylim([-1,50]);
axes[0].set_xlim([0,56]);
axes[0].legend(('Clinton','Sanders','Difference','Linear Fit'),fontsize=22);
axes[0].set_title('Cumulative Percent of Total Delegates',fontsize=30,style='italic')

axes[1].plot(toNow.index,toNow['NormPercDiff'],'k--', linewidth=4);
axes[1].axhline(color='black',linewidth=2);
#sns.regplot(x=np.arange(len(diffs)),y=normdiffs,ax=axes[1],scatter=False,lowess=True,color='firebrick',line_kws={'alpha':.6,'linewidth':4})
axes[1].yaxis.set_tick_params(labelsize=24);
axes[1].set_ylabel('Sanders more \t\t\t No Difference \t\t\t Clinton more'.expandtabs(),fontsize=26);
axes[1].set_ylim([-100,100]);
axes[1].set_yticklabels([100,50,0,50,100]);
axes[1].legend(('Difference','Lowess Fit'),fontsize=22);
axes[1].get_legend().legendHandles[1].set_color('firebrick');
plt.xticks(range(len(toNow)),toNow['State'].apply(lambda x: x.replace(' ','\n')),rotation=45,fontsize=24,multialignment='center');

for i,label in enumerate(axes[1].get_xticklabels()):
    if toNow.ix[i,'NormPercDiff'] > 0:
        label.set_color('cornflowerblue')
    else:
        label.set_color('mediumseagreen')
    if i == 25 or i > 28:
        label.set_color('black')
plt.tight_layout()
axes[1].set_title('Percent Difference Within State',fontsize=30,style='italic')
plt.suptitle("     It's Not Over",fontsize=65,y=1.05,fontdict={'weight':'bold'});
plt.savefig('notOver.png',bbox_inches='tight')

	Date	State	Total	Clinton	Sanders
0	February 1 2016	Iowa	44	23	21
1	February 9 2016	New Hampshire	24	9	15
2	February 20 2016	Nevada	35	20	15
3	February 27 2016	South Carolina	53	39	14
4	March 1 2016	Alabama	53	44	9
5	March 1 2016	American Samoa	6	4	2
6	March 1 2016	Arkansas	32	22	10
7	March 1 2016	Colorado	66	28	38
8	March 1 2016	Georgia	102	74	28
9	March 1 2016	Massachusetts	91	46	45
10	March 1 2016	Minnesota	77	31	46
11	March 1 2016	Oklahoma	38	17	21
12	March 1 2016	Tennessee	67	44	23
13	March 1 2016	Texas	222	148	74
14	March 1 2016	Vermont	16	0	16
15	March 1 2016	Virginia	95	62	33
16	March 5 2016	Kansas	33	9	24
17	March 5 2016	Louisiana	51	37	14
18	March 5 2016	Nebraska	25	10	15
19	March 6 2016	Maine	25	9	16
20	March 8 2016	Michigan	130	63	67
21	March 8 2016	Mississippi	36	32	4
22	March 12 2016	Northern Marianas	6	4	2
23	March 15 2016	Florida	214	124	60
24	March 15 2016	Illinois	156	66	64
25	March 15 2016	Missouri	64	32	32
26	March 15 2016	North Carolina	107	61	46
27	March 15 2016	Ohio	143	74	56
28	March 22 2016	Arizona	75	NaN	NaN
29	March 22 2016	Idaho	23	NaN	NaN
30	March 22 2016	Utah	33	NaN	NaN
31	March 26 2016	Alaska	16	NaN	NaN
32	March 26 2016	Hawaii	25	NaN	NaN
33	March 26 2016	Washington	101	NaN	NaN
34	April 5 2016	Wisconsin	86	NaN	NaN
35	April 9 2016	Wyoming	14	NaN	NaN
36	April 19 2016	New York	247	NaN	NaN
37	April 26 2016	Connecticut	55	NaN	NaN
38	April 26 2016	Delaware	21	NaN	NaN
39	April 26 2016	Maryland	95	NaN	NaN
40	April 26 2016	Pennsylvania	189	NaN	NaN
41	April 26 2016	Rhode Island	24	NaN	NaN
42	May 3 2016	Indiana	83	NaN	NaN
43	May 7 2016	Guam	7	NaN	NaN
44	May 10 2016	West Virginia	29	NaN	NaN
45	May 14 2016	Democrats Abroad	13	NaN	NaN
46	May 17 2016	Kentucky	55	NaN	NaN
47	May 17 2016	Oregon	61	NaN	NaN
48	June 4 2016	Virgin Islands	7	NaN	NaN
49	June 5 2016	Puerto Rico	60	NaN	NaN
50	June 7 2016	California	475	NaN	NaN
51	June 7 2016	Montana	21	NaN	NaN
52	June 7 2016	New Jersey	126	NaN	NaN
53	June 7 2016	New Mexico	34	NaN	NaN
54	June 7 2016	South Dakota	20	NaN	NaN
55	June 7 2016	North Dakota	18	NaN	NaN
56	June 14 2016	District of Columbia	20	NaN	NaN