In [148]:
%matplotlib inline
import matplotlib.pyplot as plt
import requests
import time
import html5lib
import pandas as pd
import numpy as np
import pickle
import BeautifulSoup as soup
from ipy_progressbar import ProgressBar
from IPython.display import HTML
import os
import us
import json
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from matplotlib.gridspec import GridSpec
from mpl_toolkits.basemap import Basemap
from matplotlib.patches import Polygon
from scipy.stats.stats import pearsonr

In [29]:
st_states = {}
fname = 'us-states.json'
if not(os.path.isfile(fname)):
    print('Retreiving data')
    url = "https://raw.githubusercontent.com/alignedleft/d3-book/master/chapter_12/us-states.json"
    r = requests.get(url)
    f = open(fname, 'w')
    f.write(r.text)
    f.flush()
    f.close()

abbrev_dict = {}
state_json = json.load(open(fname, 'r'))
for state in us.states.STATES:
    abbrev_dict[state.name.upper()] = state.abbr
for feat in state_json['features']:
    x = 0
    st = feat['properties']['name'].upper()
    try:
        abbrev = abbrev_dict[st]
        geom = feat['geometry']
        st_states[abbrev] = geom
    except KeyError:
        continue

In [10]:
df = pd.read_csv('county_level_clean.csv', sep='\t')

In [123]:
df2 = pd.DataFrame(df.groupby(['state'])['reports'].sum())
df2.reset_index(inplace=True)

In [124]:
df2['c'] = df2['reports'] / df2['reports'].sum()

In [79]:
def plot_map(df, col, title):
    fig = plt.figure(figsize=(15,20))
    ax = fig.add_axes([0.1,0.1,0.8,0.8])
    lat_0=35
    m = Basemap(projection='stere',lon_0=-100,lat_0=35.,lat_ts=lat_0,\
                llcrnrlat=23,urcrnrlat=70,\
                llcrnrlon=-130,urcrnrlon=-20.5,\
                rsphere=6371200.,resolution='l',area_thresh=10000)
    m.drawcoastlines()
    m.drawcountries()
    m.drawmapboundary()
    plt.title(title, fontsize=25)
    mm = max(df[col])
    for ab in df['state']:
        try:
            geom = st_states[ab.upper()]
            tp = geom['type']
            if tp == 'Polygon':
                coords = []
                coords.append(geom['coordinates'])
            elif tp == 'MultiPolygon':
                coords = geom['coordinates']
            for shapel in coords:
                for shape in shapel:
                    lons = map(lambda x: x[0], shape)
                    lats = map(lambda x: x[1], shape)
                    x, y = m(lons, lats)
                    xy = zip(x, y)
                    a = df.ix[df.state == ab, col].tolist()[0] / mm
                    cc = '#88ff88'
                    poly = Polygon(xy, facecolor=cc, edgecolor='black', alpha=a)
                    ax.add_patch(poly)
        except KeyError:
            print ab
    plt.show()

plot_map(df2, 'c', 'Density of BFRO tracked bigfoot reports')


ca-al
ca-bc
ca-mn
ca-nb
ca-on
ca-qu
ca-sk
ca-yu

In [137]:
df3 = pd.read_csv('pop_data.csv', sep='\t')

In [138]:
# Note, this is a sloppy way to implement this, don't copy me here
abbrevs = []
for i in range(df3.shape[0]):
    row = df3.iloc[i]
    if type(row.abbrev) is str:
        abbrevs.append(row.abbrev)
    else:
        try:
            ab = abbrev_dict[row.state.upper()]
            abbrevs.append(ab)
        except KeyError:
            abbrevs.append('')

df3['abbrev'] = abbrevs

In [139]:
df3.head()


Out[139]:
state pop_per_km_2 perc_pop abbrev cpop
0 New Jersey 1947.462334 0.027800 NJ 318900000
1 Rhode Island 1636.859714 0.003285 RI 318900000
2 Massachusetts 1380.813720 0.020907 MA 318900000
3 Connecticut 1195.095884 0.011234 CT 318900000
4 Maryland 982.823938 0.018521 MD 318900000

In [140]:
df2.head()


Out[140]:
state reports c
0 AK 21 0.004152
1 AL 94 0.018584
2 AR 89 0.017596
3 AZ 82 0.016212
4 CA 425 0.084025

In [141]:
df2['state'] = df2['state'].apply(str.upper)
df3['abbrev'] = df3['abbrev'].apply(lambda x: str(x).encode('ascii', 'replace').upper())

In [143]:
df4 = df3.merge(df2, left_on='abbrev', right_on='state')

In [150]:
df4['den'] = df4['reports'] / df4['pop_per_km_2']
print pearsonr(df4['reports'].tolist(), df4['pop_per_km_2'].tolist())
plt.scatter(df4['reports'], df4['pop_per_km_2'])
plt.show()


(-0.011687619350311141, 0.93123780946382695)

In [151]:
df4.sort('den', ascending=False)


Out[151]:
state_x pop_per_km_2 perc_pop abbrev cpop state_y reports c den
56 Yukon 0.100000 0.001000 CA-YU 35160000 CA-YU 5 0.000989 50.000000
51 British Columbia 4.800000 0.130000 CA-BC 35160000 CA-BC 130 0.025702 27.083333
53 Manitoba 2.200000 0.036000 CA-MN 35160000 CA-MN 31 0.006129 14.090909
48 Alaska 2.092142 0.002296 AK 318900000 AK 21 0.004152 10.037560
52 Alberta 5.700000 0.114000 CA-AL 35160000 CA-AL 38 0.007513 6.666667
49 Ontario 14.100000 0.385000 CA-ON 35160000 CA-ON 67 0.013246 4.751773
54 Saskatchewan 1.800000 0.032000 CA-SK 35160000 CA-SK 7 0.001384 3.888889
46 Montana 11.265380 0.003171 MT 318900000 MT 43 0.008501 3.817004
37 Oregon 65.822006 0.012277 OR 318900000 OR 240 0.047450 3.646197
23 Washington 168.819766 0.021778 WA 318900000 WA 594 0.117438 3.518545
47 Wyoming 9.656040 0.001820 WY 318900000 WY 28 0.005536 2.899739
42 Idaho 31.382130 0.005036 ID 318900000 ID 68 0.013444 2.166838
43 New Mexico 27.680648 0.006514 NM 318900000 NM 41 0.008106 1.481179
35 Colorado 81.754472 0.016458 CO 318900000 CO 120 0.023725 1.467810
24 Texas 162.865208 0.082620 TX 318900000 TX 201 0.039739 1.234149
39 Utah 56.809702 0.009062 UT 318900000 UT 68 0.013444 1.196979
10 California 396.058574 0.119745 CA 318900000 CA 425 0.084025 1.073074
33 Oklahoma 90.283974 0.012029 OK 318900000 OK 90 0.017794 0.996855
32 Arkansas 91.571446 0.009245 AR 318900000 AR 89 0.017596 0.971919
44 South Dakota 17.863674 0.002639 SD 318900000 SD 17 0.003361 0.951652
31 Arizona 93.824522 0.020700 AZ 318900000 AZ 82 0.016212 0.873972
26 Missouri 141.460986 0.018881 MO 318900000 MO 120 0.023725 0.848290
27 West Virginia 124.080114 0.005793 WV 318900000 WV 95 0.018782 0.765634
34 Iowa 88.996502 0.009654 IA 318900000 IA 66 0.013049 0.741602
11 Illinois 373.366880 0.040242 IL 318900000 IL 270 0.053381 0.723149
15 Michigan 281.634500 0.030912 MI 318900000 MI 201 0.039739 0.713691
38 Kansas 56.970636 0.009040 KS 318900000 KS 40 0.007908 0.702116
50 Quebec 5.800000 0.232000 CA-QU 35160000 CA-QU 4 0.000791 0.689655
55 New Brunswick 10.500000 0.022000 CA-NB 35160000 CA-NB 7 0.001384 0.666667
28 Minnesota 109.596054 0.016932 MN 318900000 MN 68 0.013444 0.620460
25 Alabama 153.531036 0.015100 AL 318900000 AL 94 0.018584 0.612254
20 Kentucky 179.119542 0.013730 KY 318900000 KY 101 0.019968 0.563869
9 Ohio 455.765088 0.036145 OH 318900000 OH 254 0.050217 0.557305
7 Florida 586.765364 0.061080 FL 318900000 FL 302 0.059707 0.514686
22 Wisconsin 170.590040 0.017939 WI 318900000 WI 83 0.016410 0.486547
16 Georgia 279.542358 0.031214 GA 318900000 GA 122 0.024120 0.436428
18 Tennessee 253.471050 0.020292 TN 318900000 TN 96 0.018980 0.378741
41 Nebraska 39.106962 0.005837 NE 318900000 NE 14 0.002768 0.357993
13 North Carolina 326.052284 0.030764 NC 318900000 NC 88 0.017398 0.269895
14 Indiana 295.152956 0.020526 IN 318900000 IN 76 0.015026 0.257494
8 Pennsylvania 459.466570 0.039903 PA 318900000 PA 109 0.021550 0.237232
45 North Dakota 16.898070 0.002260 ND 318900000 ND 4 0.000791 0.236713
21 Louisiana 172.360314 0.014449 LA 318900000 LA 40 0.007908 0.232072
30 Mississippi 102.514958 0.009344 MS 318900000 MS 22 0.004350 0.214603
12 Virginia 336.673928 0.025804 VA 318900000 VA 71 0.014037 0.210887
40 Nevada 40.877236 0.008716 NV 318900000 NV 8 0.001582 0.195708
17 South Carolina 255.563192 0.014916 SC 318900000 SC 50 0.009885 0.195646
6 New York 671.094780 0.061387 NY 318900000 NY 100 0.019771 0.149010
36 Maine 69.362554 0.004149 ME 318900000 ME 10 0.001977 0.144170
29 Vermont 109.435120 0.001957 VT 318900000 VT 6 0.001186 0.054827
19 New Hampshire 237.860452 0.004134 NH 318900000 NH 10 0.001977 0.042041
4 Maryland 982.823938 0.018521 MD 318900000 MD 34 0.006722 0.034594
0 New Jersey 1947.462334 0.027800 NJ 318900000 NJ 49 0.009688 0.025161
2 Massachusetts 1380.813720 0.020907 MA 318900000 MA 21 0.004152 0.015208
3 Connecticut 1195.095884 0.011234 CT 318900000 CT 8 0.001582 0.006694
5 Delaware 764.597434 0.002892 DE 318900000 DE 5 0.000989 0.006539
1 Rhode Island 1636.859714 0.003285 RI 318900000 RI 5 0.000989 0.003055

In [ ]:
plot_map(df4, 'den', 'BFRO reported sightings by population density')

In [152]:
sum(df4['cpop']==318900000)


Out[152]:
49

In [142]:
df4


Out[142]:
state_x pop_per_km_2 perc_pop abbrev cpop state_y reports c
0 New Jersey 1947.462334 0.027800 NJ 318900000 NJ 49 0.009688
1 Rhode Island 1636.859714 0.003285 RI 318900000 RI 5 0.000989
2 Massachusetts 1380.813720 0.020907 MA 318900000 MA 21 0.004152
3 Connecticut 1195.095884 0.011234 CT 318900000 CT 8 0.001582
4 Maryland 982.823938 0.018521 MD 318900000 MD 34 0.006722
5 Delaware 764.597434 0.002892 DE 318900000 DE 5 0.000989
6 New York 671.094780 0.061387 NY 318900000 NY 100 0.019771
7 Florida 586.765364 0.061080 FL 318900000 FL 302 0.059707
8 Pennsylvania 459.466570 0.039903 PA 318900000 PA 109 0.021550
9 Ohio 455.765088 0.036145 OH 318900000 OH 254 0.050217
10 California 396.058574 0.119745 CA 318900000 CA 425 0.084025
11 Illinois 373.366880 0.040242 IL 318900000 IL 270 0.053381
12 Virginia 336.673928 0.025804 VA 318900000 VA 71 0.014037
13 North Carolina 326.052284 0.030764 NC 318900000 NC 88 0.017398
14 Indiana 295.152956 0.020526 IN 318900000 IN 76 0.015026
15 Michigan 281.634500 0.030912 MI 318900000 MI 201 0.039739
16 Georgia 279.542358 0.031214 GA 318900000 GA 122 0.024120
17 South Carolina 255.563192 0.014916 SC 318900000 SC 50 0.009885
18 Tennessee 253.471050 0.020292 TN 318900000 TN 96 0.018980
19 New Hampshire 237.860452 0.004134 NH 318900000 NH 10 0.001977
20 Kentucky 179.119542 0.013730 KY 318900000 KY 101 0.019968
21 Louisiana 172.360314 0.014449 LA 318900000 LA 40 0.007908
22 Wisconsin 170.590040 0.017939 WI 318900000 WI 83 0.016410
23 Washington 168.819766 0.021778 WA 318900000 WA 594 0.117438
24 Texas 162.865208 0.082620 TX 318900000 TX 201 0.039739
25 Alabama 153.531036 0.015100 AL 318900000 AL 94 0.018584
26 Missouri 141.460986 0.018881 MO 318900000 MO 120 0.023725
27 West Virginia 124.080114 0.005793 WV 318900000 WV 95 0.018782
28 Minnesota 109.596054 0.016932 MN 318900000 MN 68 0.013444
29 Vermont 109.435120 0.001957 VT 318900000 VT 6 0.001186
30 Mississippi 102.514958 0.009344 MS 318900000 MS 22 0.004350
31 Arizona 93.824522 0.020700 AZ 318900000 AZ 82 0.016212
32 Arkansas 91.571446 0.009245 AR 318900000 AR 89 0.017596
33 Oklahoma 90.283974 0.012029 OK 318900000 OK 90 0.017794
34 Iowa 88.996502 0.009654 IA 318900000 IA 66 0.013049
35 Colorado 81.754472 0.016458 CO 318900000 CO 120 0.023725
36 Maine 69.362554 0.004149 ME 318900000 ME 10 0.001977
37 Oregon 65.822006 0.012277 OR 318900000 OR 240 0.047450
38 Kansas 56.970636 0.009040 KS 318900000 KS 40 0.007908
39 Utah 56.809702 0.009062 UT 318900000 UT 68 0.013444
40 Nevada 40.877236 0.008716 NV 318900000 NV 8 0.001582
41 Nebraska 39.106962 0.005837 NE 318900000 NE 14 0.002768
42 Idaho 31.382130 0.005036 ID 318900000 ID 68 0.013444
43 New Mexico 27.680648 0.006514 NM 318900000 NM 41 0.008106
44 South Dakota 17.863674 0.002639 SD 318900000 SD 17 0.003361
45 North Dakota 16.898070 0.002260 ND 318900000 ND 4 0.000791
46 Montana 11.265380 0.003171 MT 318900000 MT 43 0.008501
47 Wyoming 9.656040 0.001820 WY 318900000 WY 28 0.005536
48 Alaska 2.092142 0.002296 AK 318900000 AK 21 0.004152
49 Ontario 14.100000 0.385000 CA-ON 35160000 CA-ON 67 0.013246
50 Quebec 5.800000 0.232000 CA-QU 35160000 CA-QU 4 0.000791
51 British Columbia 4.800000 0.130000 CA-BC 35160000 CA-BC 130 0.025702
52 Alberta 5.700000 0.114000 CA-AL 35160000 CA-AL 38 0.007513
53 Manitoba 2.200000 0.036000 CA-MN 35160000 CA-MN 31 0.006129
54 Saskatchewan 1.800000 0.032000 CA-SK 35160000 CA-SK 7 0.001384
55 New Brunswick 10.500000 0.022000 CA-NB 35160000 CA-NB 7 0.001384
56 Yukon 0.100000 0.001000 CA-YU 35160000 CA-YU 5 0.000989