by Talha Oz & Manqi Li
This notebook is a demo on geocoding the followers of a governor, namely Jack Dalrymple, the governor of North Dakota. Also can be considered as an intermediary verification step before scaling to all 50 states.
The JSON file we read in just contains the IDs of the followers of the governor and is generated by DD-CSS
In [1]:
cd ../
In [3]:
import pandas as pd
import json
#from tw import *
import twitter
from utilities.geocoder import Geocoder
from collections import Counter
In [4]:
# read the IDs of the followers of the governor of North Dakota
# this file is retrieved from dd-css.com
filename = '5522fdf6bd90594f049c4aef.json'
followers_file = open('data/'+filename)
followers = json.load(followers_file)
fids = followers['data']['followers']
followers['parameters']['screen_name']
Out[4]:
In [ ]:
# Connect to twitter and retrieve followers' information
# https://dev.twitter.com/rest/reference/get/users/lookup
resp = []
twitter_api = oauth_login()
for i in range(1+len(fids)//100):
resp.extend(twitter_api.users.lookup(user_id=fids[100*i:100*(i+1)]))
In [5]:
# let's save this response
with open('data/resp_'+filename, 'w') as outfile:
json.dump(resp, outfile)
In [ ]:
# let's get the locations out of this JSON response
i = 0
locations = []
for r in resp:
if r['location'] != '':
locations.append(r['location'])
print(r['location'],end='; ')
i=i+1
print("\nNumber of non-empty location info in user profiles: ",i)
print('Rate of non-empty user-profile location fields: {0:.2f} %'.format(i*100/len(resp)))
In [ ]:
# let's save the locations to a file
governor = {'name':followers['parameters']['screen_name'], 'locs':locations}
with open('data/loc_'+filename, 'w') as outfile:
json.dump(governor, outfile)
In [6]:
# read in the locations back
filename = 'loc_5522fdf6bd90594f049c4aef.json'
followers_file = open('data/'+filename)
followers = json.load(followers_file)
followers.keys()
Out[6]:
In [7]:
gc = Geocoder('utilities/geodata/state_abbr_file', 'utilities/geodata/city_file')
latlon = []
for f in followers['locs']:
point = gc.geocode(f.strip())
if point != None:
latlon.append((point[0], point[1]))
cnt = Counter(latlon)
print('Number of locations geocoded:',sum(cnt.values()))
cnt.most_common()
Out[7]:
In [8]:
from IPython.display import HTML
HTML('<iframe width="1000" height="600" scrolling="no" frameborder="no" src="https://www.google.com/fusiontables/embedviz?q=select+col0+from+1mGLpmSCTW6wK07tL0xkd-nmvY7uLJzsl4hd0sRHD+limit+1000&viz=HEATMAP&h=true&lat=40.33704203649286&lng=-95.15016500000002&t=1&z=5&l=col0&y=2&tmplt=2&hmd=true&hmg=%2366ff0000%2C%2393ff00ff%2C%23c1ff00ff%2C%23eeff00ff%2C%23f4e300ff%2C%23f4e300ff%2C%23f9c600ff%2C%23ffaa00ff%2C%23ff7100ff%2C%23ff3900ff%2C%23ff0000ff&hmo=0.6&hmr=25&hmw=0&hml=ONE_COL_LAT_LNG"></iframe>')
Out[8]: