notebook.community

Edit and run



In [23]:

    
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression as LR
%matplotlib inline



In [24]:

    
data = pd.read_csv('data.csv')



In [25]:

    
data









    Out[25]:







  
    
      
      Region
      EC votes
      2016 Estimate
      2010 Census
      Rural population
      Urban population
    
  
  
    
      0
      Alabama
      9
      4863300
      4779736
      1957932
      2821804
    
    
      1
      Alaska
      3
      741894
      710231
      241338
      468893
    
    
      2
      Arizona
      11
      6931071
      6392017
      651358
      5740659
    
    
      3
      Arkansas
      6
      2988248
      2915918
      1278329
      1637589
    
    
      4
      California
      55
      39250017
      37253956
      1880350
      35373606
    
    
      5
      Colorado
      9
      5540545
      5029196
      696435
      4332761
    
    
      6
      Connecticut
      7
      3576452
      3574097
      429155
      3144942
    
    
      7
      Delaware
      3
      952065
      897934
      149985
      747949
    
    
      8
      District of Columbia
      3
      681170
      601723
      0
      601723
    
    
      9
      Florida
      29
      20612439
      18801310
      1661466
      17139844
    
    
      10
      Georgia
      16
      10310371
      9687653
      2415502
      7272151
    
    
      11
      Hawaii
      4
      1428557
      1360301
      109812
      1250489
    
    
      12
      Idaho
      4
      1683140
      1567582
      461212
      1106370
    
    
      13
      Illinois
      20
      12801539
      12830632
      1477079
      11353553
    
    
      14
      Indiana
      11
      6633053
      6483802
      1786702
      4697100
    
    
      15
      Iowa
      6
      3134693
      3046355
      1096099
      1950256
    
    
      16
      Kansas
      6
      2907289
      2853118
      736157
      2116961
    
    
      17
      Kentucky
      8
      4436974
      4339367
      1806024
      2533343
    
    
      18
      Louisiana
      8
      4681666
      4533372
      1215567
      3317805
    
    
      19
      Maine
      4
      1331479
      1328361
      814819
      513542
    
    
      20
      Maryland
      10
      6016447
      5773552
      739221
      5034331
    
    
      21
      Massachusetts
      11
      6811779
      6547629
      525640
      6021989
    
    
      22
      Michigan
      16
      9928301
      9883640
      2513683
      7369957
    
    
      23
      Minnesota
      10
      5519952
      5303925
      1417614
      3886311
    
    
      24
      Mississippi
      6
      2988726
      2967297
      1503073
      1464224
    
    
      25
      Missouri
      10
      6093000
      5988927
      1770556
      4218371
    
    
      26
      Montana
      3
      1042520
      989415
      436401
      553014
    
    
      27
      Nebraska
      5
      1907116
      1826341
      490655
      1335686
    
    
      28
      Nevada
      6
      2940058
      2700551
      156754
      2543797
    
    
      29
      New Hampshire
      4
      1334795
      1316470
      522598
      793872
    
    
      30
      New Jersey
      14
      8944469
      8791894
      467768
      8324126
    
    
      31
      New Mexico
      5
      2081015
      2059179
      464818
      1594361
    
    
      32
      New York
      29
      19745289
      19378102
      2349997
      17028105
    
    
      33
      North Carolina
      15
      10146788
      9535483
      3233727
      6301756
    
    
      34
      North Dakota
      3
      757952
      672591
      269719
      402872
    
    
      35
      Ohio
      18
      11614373
      11536504
      2546810
      8989694
    
    
      36
      Oklahoma
      7
      3923561
      3751351
      1266322
      2485029
    
    
      37
      Oregon
      7
      4093465
      3831074
      726692
      3104382
    
    
      38
      Pennsylvania
      20
      12802503
      12702379
      2711092
      9991287
    
    
      39
      Rhode Island
      4
      1056426
      1052567
      97524
      955043
    
    
      40
      South Carolina
      9
      4961119
      4625364
      1557555
      3067809
    
    
      41
      South Dakota
      3
      865454
      814180
      352933
      461247
    
    
      42
      Tennessee
      11
      6651194
      6346105
      2132860
      4213245
    
    
      43
      Texas
      38
      27862596
      25145561
      3847522
      21298039
    
    
      44
      Utah
      6
      3051217
      2763885
      260290
      2503595
    
    
      45
      Vermont
      3
      624594
      625741
      382356
      243385
    
    
      46
      Virginia
      13
      8411808
      8001024
      1963930
      6037094
    
    
      47
      Washington
      12
      7288000
      6724540
      1072671
      5651869
    
    
      48
      West Virginia
      5
      1831102
      1852994
      950184
      902810
    
    
      49
      Wisconsin
      10
      5778708
      5686986
      1697348
      3989638
    
    
      50
      Wyoming
      3
      585501
      563626
      198633
      364993



In [64]:

    
label_states = ['California', 'New York', 'District of Columbia', 'Wyoming', 'Michigan']
names = ['CA', 'NY', 'DC', 'WY', 'MI']
alpha = .4



In [65]:

    
f, axes = plt.subplots(1, 3, figsize=(15, 5), sharey=True)
ax0, ax1, ax2 = axes

avg = data['EC votes'].sum() / data['2010 Census'].sum()
rural = np.dot(data['Rural population'], data['EC votes'] / data['2010 Census']) / data['Rural population'].sum()
urban = np.dot(data['Urban population'], data['EC votes'] / data['2010 Census']) / data['Urban population'].sum()

y = data['EC votes'] / data['2010 Census']
keys = ['2010 Census', 'Rural population', 'Urban population']
colors = ['#1f77b4', '#ff7f0e', '#2ca02c']
xlabels = ['Population', 'Rural population', 'Urban population']
for ax, key, c, xlabel in zip(axes, keys, colors, xlabels):
    x = data[key]
    ax.scatter(x, y, c=c, alpha=alpha)
    ax.grid()
    ax.plot([x.min(), x.max()], [avg, avg], c='#1f77b4', label='Total')
    ax.plot([x.min(), x.max()], [rural, rural], c='#ff7f0e', label='Urban')
    ax.plot([x.min(), x.max()], [urban, urban], c='#2ca02c', label='Rural')
    ax.set_xlabel(xlabel)
    ax.set_ylabel('EC votes per person')
    ax.ticklabel_format(style='sci', axis='y', scilimits=(0, 0))
    ax.ticklabel_format(style='sci', axis='x', scilimits=(0, 0))
    for st, n in zip(label_states, names):
        idx = np.argmax(data['Region'] == st)
        ax.annotate(n, (x[idx], y[idx]),
                    xytext=(5, -5), textcoords='offset points',
                   rotation=-45)
        ax.scatter(x[idx], y[idx], c=c)

ax0.set_ylim(0, 1.1 * y.max())
ax0.legend(loc='best')

plt.savefig('ec_per_person.png')
plt.draw()



In [126]:

    
y[8] / y[4]









    Out[126]:





3.3770255809340237



In [41]:

    
label_states2 = ['California', 'New York', 'Wyoming', 'Michigan']
names2 = ['CA', 'NY', 'WY', 'MI']



In [56]:

    
f, axes = plt.subplots(1, 2, figsize=(10, 5), sharey=False)

plot_first = 45
x = data['2010 Census'].values
idxs = np.argsort(x)
x = x[idxs]
y = data['EC votes'].values
y = y[idxs]

for ax in axes:
    ax.scatter(x, y, c='#1f77b4', alpha=alpha)
    ax.grid()
    ax.plot([x.min(), x.max()], [x.min() * avg, x.max() * avg], c='#1f77b4',
            label='Proportional representation')
    for xp, yp in zip(x, y):
        ax.plot([xp, xp], [yp, xp * avg], c='red', lw=.5)
    ax.set_xlabel('Population')
    ax.set_ylabel('EC votes')
    ax.ticklabel_format(style='sci', axis='x', scilimits=(0, 0))

    for st, n in zip(label_states2, names2):
        idx = np.argmax(data['Region'].values[idxs] == st)
        try:
            xp, yp = x[idx], y[idx]
            yloc = yp - (avg * xp) + 10 * np.sign(yp - (avg * xp))
            print(yloc)
            ax.annotate(n, (xp, yp),
                        xytext=(0, yloc), textcoords='offset points',
                       rotation=0, ha='center', va='center')
            ax.scatter(x[idx], y[idx], c='#1f77b4')
        except IndexError:
            pass
    x = x[:plot_first]
    y = y[:plot_first]

axes[0].set_xticks(np.arange(0, 5e7, 1e7))
axes[0].set_yticks(np.arange(0, 70, 10))

axes[1].set_xticks(np.arange(0, 1.5e7, .25e7))
axes[1].set_yticks(np.arange(0, 25, 5))

axes[0].legend(loc='best')
plt.savefig('ec_per_state.png')
plt.show()









    



-19.9163335536
-14.7670268647
12.0178617966
-11.2225916347
12.0178617966
-11.2225916347



In [63]:

    
f, ax = plt.subplots(1, figsize=(5, 5))
x = data['2010 Census'].values
idxs = np.argsort(x)
y = data['Rural population'].values / x
x = x[idxs]
y = y[idxs]
model = LR()
model.fit(x[:, np.newaxis], y[:, np.newaxis])
ax.plot([x.min(), x.max()], [float(model.predict(x.min())), float(model.predict(x.max()))],
        label='Fit', c='#1f77b4')
ax.scatter(x, y, c='#1f77b4', alpha=alpha)
ax.grid()
ax.set_xlabel('Population')
ax.set_ylabel('Fraction Rural')
ax.ticklabel_format(style='sci', axis='x', scilimits=(0, 0))
ax.set_xticks(np.arange(0, 5e7, 1e7))
ax.set_yticks(np.arange(0, .8, .2))
for st, n in zip(label_states, names):
    idx = np.argmax(data['Region'].values[idxs] == st)
    ax.annotate(n, (x[idx], y[idx]),
                xytext=(0, 10), textcoords='offset points',
               rotation=0, ha='center', va='center')
    ax.scatter(x[idx], y[idx], c='#1f77b4')

ax.legend(loc='best')
f.tight_layout()

plt.savefig('pop_vs_rural.png')
plt.show()



In [113]:

    
print(data.columns)
print(data['EC votes'].sum())
print(data['2010 Census'].sum())









    



Index(['Region', 'EC votes', '2016 Estimate', '2010 Census',
       'Rural population', 'Urban population'],
      dtype='object')
538
308745538

	Region	EC votes	2016 Estimate	2010 Census	Rural population	Urban population
0	Alabama	9	4863300	4779736	1957932	2821804
1	Alaska	3	741894	710231	241338	468893
2	Arizona	11	6931071	6392017	651358	5740659
3	Arkansas	6	2988248	2915918	1278329	1637589
4	California	55	39250017	37253956	1880350	35373606
5	Colorado	9	5540545	5029196	696435	4332761
6	Connecticut	7	3576452	3574097	429155	3144942
7	Delaware	3	952065	897934	149985	747949
8	District of Columbia	3	681170	601723	0	601723
9	Florida	29	20612439	18801310	1661466	17139844
10	Georgia	16	10310371	9687653	2415502	7272151
11	Hawaii	4	1428557	1360301	109812	1250489
12	Idaho	4	1683140	1567582	461212	1106370
13	Illinois	20	12801539	12830632	1477079	11353553
14	Indiana	11	6633053	6483802	1786702	4697100
15	Iowa	6	3134693	3046355	1096099	1950256
16	Kansas	6	2907289	2853118	736157	2116961
17	Kentucky	8	4436974	4339367	1806024	2533343
18	Louisiana	8	4681666	4533372	1215567	3317805
19	Maine	4	1331479	1328361	814819	513542
20	Maryland	10	6016447	5773552	739221	5034331
21	Massachusetts	11	6811779	6547629	525640	6021989
22	Michigan	16	9928301	9883640	2513683	7369957
23	Minnesota	10	5519952	5303925	1417614	3886311
24	Mississippi	6	2988726	2967297	1503073	1464224
25	Missouri	10	6093000	5988927	1770556	4218371
26	Montana	3	1042520	989415	436401	553014
27	Nebraska	5	1907116	1826341	490655	1335686
28	Nevada	6	2940058	2700551	156754	2543797
29	New Hampshire	4	1334795	1316470	522598	793872
30	New Jersey	14	8944469	8791894	467768	8324126
31	New Mexico	5	2081015	2059179	464818	1594361
32	New York	29	19745289	19378102	2349997	17028105
33	North Carolina	15	10146788	9535483	3233727	6301756
34	North Dakota	3	757952	672591	269719	402872
35	Ohio	18	11614373	11536504	2546810	8989694
36	Oklahoma	7	3923561	3751351	1266322	2485029
37	Oregon	7	4093465	3831074	726692	3104382
38	Pennsylvania	20	12802503	12702379	2711092	9991287
39	Rhode Island	4	1056426	1052567	97524	955043
40	South Carolina	9	4961119	4625364	1557555	3067809
41	South Dakota	3	865454	814180	352933	461247
42	Tennessee	11	6651194	6346105	2132860	4213245
43	Texas	38	27862596	25145561	3847522	21298039
44	Utah	6	3051217	2763885	260290	2503595
45	Vermont	3	624594	625741	382356	243385
46	Virginia	13	8411808	8001024	1963930	6037094
47	Washington	12	7288000	6724540	1072671	5651869
48	West Virginia	5	1831102	1852994	950184	902810
49	Wisconsin	10	5778708	5686986	1697348	3989638
50	Wyoming	3	585501	563626	198633	364993