In [11]:
import pandas as pd
import numpy as np
import folium
from IPython.display import HTML
%matplotlib inline
import matplotlib.pyplot as plt
plt.style.use('ggplot')
In [12]:
# read in election and twitter datasets in tabular format
df = pd.read_csv('../data/governors-challengers.csv')
ch = pd.read_table('../data/tw_ch.csv',usecols=['screen_name','followers_count'], encoding='utf-16')
gov = pd.read_table('../data/tw_gov.csv',usecols=['screen_name','followers'])
#rename column names for clarification and seamless merging
ch = ch.rename(columns={'screen_name':'twch','followers_count':'folch'})
gov = gov.rename(columns={'screen_name':'twgov','followers':'folgov'})
In [24]:
# map of Incumbent Governors Vote Shares (%)
mapname = 'gov_share'
state_geo = 'us_states.geojson'
states = folium.Map(location=[40, -99], zoom_start=4)
states.geo_json(geo_path=state_geo, data=df, data_out=mapname+'.json',
columns=['state', 'shareGov'],
threshold_scale=[45,50,55,60,65,70],
key_on='feature.properties.name',
fill_color='PuRd', fill_opacity=0.7, line_opacity=0.2,
legend_name='Incumbent Governor Vote Share (%)')
states.create_map(path=mapname+'.html')
HTML('<iframe src='+mapname+'.html style="width: 100%; height: 500px; border: none"></iframe>')
Out[24]:
In [16]:
#let's merge on screen names
df = df.merge(gov)
df = df.merge(ch)
# governors' twitter followers share (only considering the primary challenger)
df['twshare'] = 100 * df.folgov / (df.folgov + df.folch)
# similarly, update shareGov field
df['shareGov'] = 100 * df.shareGov / (df.shareGov + df.shareCh)
# get the vote share - twitter share diff
df['shareDiff'] = abs(df['twshare'] - df['shareGov'])
df
Out[16]:
In [23]:
states = folium.Map(location=[40, -99], zoom_start=4)
states.geo_json(geo_path=state_geo, data=df, data_out='govtw_share.json',
columns=['state', 'twshare'],
threshold_scale=[50,60,70,80,90,99],
key_on='feature.properties.name',
fill_color='PuRd', fill_opacity=0.7, line_opacity=0.2,
legend_name='Incumbent Governors Twitter Follower Share (%)')
states.create_map(path='govtw_share.html')
HTML('<iframe src="govtw_share.html" style="width: 100%; height: 510px; border: none"></iframe>')
Out[23]:
In [71]:
mapname = 'share_diff'
states = folium.Map(location=[40, -99], zoom_start=4)
states.geo_json(geo_path=state_geo, data=df, data_out=mapname+'.json',
threshold_scale=[5, 10, 15, 20, 30, 40],
columns=['state', 'shareDiff'],
key_on='feature.properties.name',
fill_color='YlGnBu', fill_opacity=0.7, line_opacity=0.2,
legend_name='The Difference Between Governors Vote Share & Twitter Follower Share (%)')
states.create_map(path=mapname+'.html')
HTML('<iframe src="'+mapname+'.html" style="width: 100%; height: 510px; border: none"></iframe>')
Out[71]:
In [7]:
ax = df[['twshare','shareGov']].plot(x = df['state'], xticks=range(len(df)), rot=75,figsize=(15,3))
ax.legend(['Twitter share','Vote share'],loc='best');
In [5]:
# correlation between twitter share of governors
# and their 'normalized' vote share
df[['twshare','shareGov']].corr()
Out[5]:
In [28]:
ax = df.plot(x='twshare', y='shareGov', kind='scatter', figsize=(15,15),xlim=(0,100),ylim=(50,80))
df.apply(lambda x: ax.annotate(x['state'], (x['twshare'],x['shareGov']),
xytext=(-40, 7), textcoords='offset points',fontsize=14), axis=1);
ax.set_xlabel("Normalized Vote Share of Sitting Governor",fontsize=14);
ax.set_ylabel("Normalized Twitter Follower Share of Sitting Governor",fontsize=14);
ax.set_title('Twitter Follower Share vs Vote Share of Sitting US Governors',fontsize=18);
ax.text(0.01,0.99,'Correlation between Twitter share and vote share : '+
'{:2.2f}'.format(df[['twshare','shareGov']].corr().ix[0,1]),
horizontalalignment='left',verticalalignment='top',
transform=ax.transAxes,fontsize=12);
In [9]:
# perform clustering and plot the dendrogram
from scipy.cluster.hierarchy import linkage, dendrogram
from scipy.spatial.distance import pdist, squareform
# compute distance matrix
distxy = squareform(pdist(df[['twshare','shareGov']], metric='euclidean'))
ax = plt.subplot(111)
ax.figure.set_size_inches(5,10)
l = linkage(distxy,method='complete')
dendrogram(l,labels=df['state'].tolist(),orientation='right');
In [10]:
# states whose incumbent governors have less followers than their challengers...
df[df['folch']>df['folgov']]
Out[10]:
In [84]:
import json
from collections import Counter
f = json.load(open('../data/geocoded.json'))
c2 = Counter()
c = Counter()
for state in f:
#print(state,f[state]['flocs'])
c2.update(f[state]['flocs'].keys())
c.update(f[state]['flocs'])
c2.most_common(10)
Out[84]:
In [112]:
fig = plt.figure(figsize=(16,8))
ax = plt.gca()
ax.scatter(range(len(c2)),sorted(list(c2.values()),reverse = True))
#ax.set_yscale('log')
#ax.set_xscale('log')
ax.set_xlim(-100,len(c2)+100)
ax.set_ylim(0,c2.most_common(1)[0][1]+1)
ax.set_xlabel('Cities (ranked)')
ax.set_ylabel('Unique governors followed')
ax.set_title('Number of Unique Governors Followed by US cities');
In [77]:
plt.loglog(range(len(c2)),sorted(list(c2.values()),reverse = True));
In [122]:
import powerlaw
fit = powerlaw.Fit(list(c.values()))
print(fit.alpha,fit.sigma)
#print(fit.distribution_compare('power_law', 'lognormal'))
In [113]:
fig = plt.figure(figsize=(16,8))
ax = plt.gca()
ax.scatter(range(len(c)),sorted(list(c.values()),reverse = True))
ax.set_yscale('log')
#ax.set_xscale('log')
ax.set_xlim(-100,len(c)+100)
ax.set_ylim(0.9,c.most_common(1)[0][1]+5000)
ax.set_xlabel('Cities (ranked)')
ax.set_ylabel('Residents following at least one governor')
ax.set_title('Total Number of Residents Following at least one Governor');
In [115]:
# http://code.xster.net/pygeocoder/wiki/Home
from pygeocoder import Geocoder
for i in range(10):
x = c2.most_common(10)[i]
results = Geocoder.reverse_geocode(
float(x[0].split(',')[0]),
float(x[0].split(',')[1]))
print(results, '\t # of govs followed:',x[1])
In [13]:
states = folium.Map(location=[40, -99], zoom_start=4, tiles='Mapbox Bright')
for k,v in c2.items():
states.circle_marker(location=k.split(','), radius=v*10)
states.create_map(path='unique_govs.html')
HTML('<iframe src="unique_govs.html" style="width: 100%; height: 500px; border: none"></iframe>')
Out[13]:
In [14]:
import json
from collections import Counter
f = json.load(open('../data/geocoded.json'))
c = Counter()
for state in f:
#print(state,f[state]['flocs'])
c.update(f[state]['flocs'])
In [15]:
# http://code.xster.net/pygeocoder/wiki/Home
from pygeocoder import Geocoder
for i in range(10):
x = c.most_common(10)[i]
lat,lon = x[0].split(',')
results = Geocoder.reverse_geocode(float(lat), float(lon))
print(results, '\t # of followers:',x[1])
In [16]:
states = folium.Map(location=[40, -99], zoom_start=4, tiles='Mapbox Bright')
for k,v in c.items():
states.circle_marker(location=k.split(','), radius=v)
states.create_map(path='all_followers.html')
HTML('<iframe src="all_followers.html" style="width: 100%; height: 500px; border: none"></iframe>')
Out[16]:
In [17]:
from mpl_toolkits.basemap import Basemap
from matplotlib.path import Path
# Mercator Projection
# http://matplotlib.org/basemap/users/merc.html
m = Basemap(projection='merc', llcrnrlat=-80, urcrnrlat=80,
llcrnrlon=-180, urcrnrlon=180, lat_ts=20, resolution='c')
# Poly vertices
p = [[25.774252, -80.190262], [18.466465, -66.118292], [32.321384, -64.75737]]
# Projected vertices
p_projected = [m(x[1], x[0]) for x in p]
# Create the Path
p_path = Path(p_projected)
# Test points
p1 = [27.254629577800088, -76.728515625]
p2 = [27.254629577800088, -74.928515625]
# Test point projection
p1_projected = m(p1[1], p1[0])
p2_projected = m(p2[1], p2[0])
print(p_path.contains_point(p1_projected))
print(p_path.contains_point(p2_projected))