In [148]:
%matplotlib inline
import matplotlib.pyplot as plt
import requests
import time
import html5lib
import pandas as pd
import numpy as np
import pickle
import BeautifulSoup as soup
from ipy_progressbar import ProgressBar
from IPython.display import HTML
import os
import us
import json
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from matplotlib.gridspec import GridSpec
from mpl_toolkits.basemap import Basemap
from matplotlib.patches import Polygon
from scipy.stats.stats import pearsonr
In [29]:
st_states = {}
fname = 'us-states.json'
if not(os.path.isfile(fname)):
print('Retreiving data')
url = "https://raw.githubusercontent.com/alignedleft/d3-book/master/chapter_12/us-states.json"
r = requests.get(url)
f = open(fname, 'w')
f.write(r.text)
f.flush()
f.close()
abbrev_dict = {}
state_json = json.load(open(fname, 'r'))
for state in us.states.STATES:
abbrev_dict[state.name.upper()] = state.abbr
for feat in state_json['features']:
x = 0
st = feat['properties']['name'].upper()
try:
abbrev = abbrev_dict[st]
geom = feat['geometry']
st_states[abbrev] = geom
except KeyError:
continue
In [10]:
df = pd.read_csv('county_level_clean.csv', sep='\t')
In [123]:
df2 = pd.DataFrame(df.groupby(['state'])['reports'].sum())
df2.reset_index(inplace=True)
In [124]:
df2['c'] = df2['reports'] / df2['reports'].sum()
In [79]:
def plot_map(df, col, title):
fig = plt.figure(figsize=(15,20))
ax = fig.add_axes([0.1,0.1,0.8,0.8])
lat_0=35
m = Basemap(projection='stere',lon_0=-100,lat_0=35.,lat_ts=lat_0,\
llcrnrlat=23,urcrnrlat=70,\
llcrnrlon=-130,urcrnrlon=-20.5,\
rsphere=6371200.,resolution='l',area_thresh=10000)
m.drawcoastlines()
m.drawcountries()
m.drawmapboundary()
plt.title(title, fontsize=25)
mm = max(df[col])
for ab in df['state']:
try:
geom = st_states[ab.upper()]
tp = geom['type']
if tp == 'Polygon':
coords = []
coords.append(geom['coordinates'])
elif tp == 'MultiPolygon':
coords = geom['coordinates']
for shapel in coords:
for shape in shapel:
lons = map(lambda x: x[0], shape)
lats = map(lambda x: x[1], shape)
x, y = m(lons, lats)
xy = zip(x, y)
a = df.ix[df.state == ab, col].tolist()[0] / mm
cc = '#88ff88'
poly = Polygon(xy, facecolor=cc, edgecolor='black', alpha=a)
ax.add_patch(poly)
except KeyError:
print ab
plt.show()
plot_map(df2, 'c', 'Density of BFRO tracked bigfoot reports')
In [137]:
df3 = pd.read_csv('pop_data.csv', sep='\t')
In [138]:
# Note, this is a sloppy way to implement this, don't copy me here
abbrevs = []
for i in range(df3.shape[0]):
row = df3.iloc[i]
if type(row.abbrev) is str:
abbrevs.append(row.abbrev)
else:
try:
ab = abbrev_dict[row.state.upper()]
abbrevs.append(ab)
except KeyError:
abbrevs.append('')
df3['abbrev'] = abbrevs
In [139]:
df3.head()
Out[139]:
In [140]:
df2.head()
Out[140]:
In [141]:
df2['state'] = df2['state'].apply(str.upper)
df3['abbrev'] = df3['abbrev'].apply(lambda x: str(x).encode('ascii', 'replace').upper())
In [143]:
df4 = df3.merge(df2, left_on='abbrev', right_on='state')
In [150]:
df4['den'] = df4['reports'] / df4['pop_per_km_2']
print pearsonr(df4['reports'].tolist(), df4['pop_per_km_2'].tolist())
plt.scatter(df4['reports'], df4['pop_per_km_2'])
plt.show()
In [151]:
df4.sort('den', ascending=False)
Out[151]:
In [ ]:
plot_map(df4, 'den', 'BFRO reported sightings by population density')
In [152]:
sum(df4['cpop']==318900000)
Out[152]:
In [142]:
df4
Out[142]: