GeoInfluence of Governors

by Talha Oz & Manqi Li

This notebook is a demo on geocoding the followers of a governor, namely Jack Dalrymple, the governor of North Dakota. Also can be considered as an intermediary verification step before scaling to all 50 states.

The JSON file we read in just contains the IDs of the followers of the governor and is generated by DD-CSS


In [1]:
cd ../


C:\Users\Talha\Documents\WinPython3\projects\GeoInfluence

In [3]:
import pandas as pd
import json
#from tw import *
import twitter
from utilities.geocoder import Geocoder
from collections import Counter

In [4]:
# read the IDs of the followers of the governor of North Dakota
# this file is retrieved from dd-css.com
filename = '5522fdf6bd90594f049c4aef.json'
followers_file = open('data/'+filename)
followers = json.load(followers_file)
fids = followers['data']['followers']
followers['parameters']['screen_name']


Out[4]:
'NDGovDalrymple'

In [ ]:
# Connect to twitter and retrieve followers' information
# https://dev.twitter.com/rest/reference/get/users/lookup
resp = []
twitter_api = oauth_login()
for i in range(1+len(fids)//100):
    resp.extend(twitter_api.users.lookup(user_id=fids[100*i:100*(i+1)]))

In [5]:
# let's save this response
with open('data/resp_'+filename, 'w') as outfile:
    json.dump(resp, outfile)


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-5-0f68fc4df566> in <module>()
      1 # let's save this response
      2 with open('data/resp_'+filename, 'w') as outfile:
----> 3     json.dump(resp, outfile)

NameError: name 'resp' is not defined

In [ ]:
# let's get the locations out of this JSON response
i = 0
locations = []
for r in resp:
    if r['location'] != '':
        locations.append(r['location'])
        print(r['location'],end='; ')
        i=i+1
print("\nNumber of non-empty location info in user profiles: ",i)
print('Rate of non-empty user-profile location fields: {0:.2f} %'.format(i*100/len(resp)))

In [ ]:
# let's save the locations to a file
governor = {'name':followers['parameters']['screen_name'], 'locs':locations}
with open('data/loc_'+filename, 'w') as outfile:
    json.dump(governor, outfile)

In [6]:
# read in the locations back
filename = 'loc_5522fdf6bd90594f049c4aef.json'
followers_file = open('data/'+filename)
followers = json.load(followers_file)
followers.keys()


Out[6]:
dict_keys(['locs', 'name'])

In [7]:
gc = Geocoder('utilities/geodata/state_abbr_file', 'utilities/geodata/city_file')
latlon = []
for f in followers['locs']:
    point = gc.geocode(f.strip())
    if point != None:
        latlon.append((point[0], point[1]))
cnt = Counter(latlon)
print('Number of locations geocoded:',sum(cnt.values()))
cnt.most_common()


Number of locations geocoded: 115
Out[7]:
[(('46.813343', '-100.779004'), 35),
 (('46.871414', '-96.808658'), 19),
 (('38.913611', '-77.013222'), 18),
 (('44.964465', '-93.268198'), 5),
 (('48.233190', '-101.292229'), 3),
 (('46.864563', '-96.758707'), 3),
 (('46.828893', '-100.891138'), 2),
 (('44.367966', '-100.336378'), 2),
 (('38.880344', '-77.108260'), 2),
 (('48.156477', '-103.628064'), 2),
 (('46.883575', '-102.788811'), 2),
 (('40.352206', '-74.657071'), 1),
 (('48.961469', '-101.631889'), 1),
 (('46.780373', '-92.117079'), 1),
 (('31.056125', '-95.129322'), 1),
 (('30.451800', '-84.272770'), 1),
 (('46.595805', '-112.027031'), 1),
 (('40.123496', '-82.921432'), 1),
 (('46.333108', '-113.296564'), 1),
 (('39.896280', '-75.035908'), 1),
 (('41.684200', '-71.268669'), 1),
 (('48.795416', '-97.623259'), 1),
 (('39.726287', '-104.965486'), 1),
 (('47.673341', '-117.410271'), 1),
 (('38.816242', '-77.071282'), 1),
 (('33.050374', '-96.745935'), 1),
 (('42.516564', '-89.029964'), 1),
 (('44.410322', '-103.518486'), 1),
 (('35.357276', '-119.031661'), 1),
 (('47.268088', '-101.777229'), 1),
 (('40.441419', '-79.977292'), 1),
 (('33.590709', '-111.895985'), 1),
 (('33.528370', '-112.076300'), 1)]

In [8]:
from IPython.display import HTML
HTML('<iframe width="1000" height="600" scrolling="no" frameborder="no" src="https://www.google.com/fusiontables/embedviz?q=select+col0+from+1mGLpmSCTW6wK07tL0xkd-nmvY7uLJzsl4hd0sRHD+limit+1000&amp;viz=HEATMAP&amp;h=true&amp;lat=40.33704203649286&amp;lng=-95.15016500000002&amp;t=1&amp;z=5&amp;l=col0&amp;y=2&amp;tmplt=2&amp;hmd=true&amp;hmg=%2366ff0000%2C%2393ff00ff%2C%23c1ff00ff%2C%23eeff00ff%2C%23f4e300ff%2C%23f4e300ff%2C%23f9c600ff%2C%23ffaa00ff%2C%23ff7100ff%2C%23ff3900ff%2C%23ff0000ff&amp;hmo=0.6&amp;hmr=25&amp;hmw=0&amp;hml=ONE_COL_LAT_LNG"></iframe>')


Out[8]: