In [26]:
# Imports needed for this ipython file
from collections import Counter
import ConfigParser
import matplotlib.pyplot as plt
import networkx as nx
import sys
import time
import pickle
import os
from TwitterAPI import TwitterAPI
%matplotlib inline
In [27]:
def get_twitter(config_file):
"""
Args:
config_file ... A config file in ConfigParser format with Twitter credentials
Returns:
An instance of TwitterAPI.
"""
config = ConfigParser.ConfigParser()
config.read(config_file)
twitter = TwitterAPI(
config.get('twitter', 'consumer_key'),
config.get('twitter', 'consumer_secret'),
config.get('twitter', 'access_token'),
config.get('twitter', 'access_token_secret'))
return twitter
twitter = get_twitter('twitter.cfg')
print('Established Twitter connection.')
In [28]:
def robust_request(twitter, resource, params, max_tries=5):
""" If a Twitter request fails, sleep for 15 minutes.
Do this at most max_tries times before quitting.
Args:
twitter .... A TwitterAPI object.
resource ... A resource string to request.
params ..... A parameter dictionary for the request.
max_tries .. The maximum number of tries to attempt.
Returns:
A TwitterResponse object, or None if failed.
"""
for i in range(max_tries):
try:
request = twitter.request(resource, params)
except:
print >> sys.stderr, 'Got Connection error: sleeping for 15 minutes.'
sys.stderr.flush()
time.sleep(61 * 15)
twitter = get_twitter('twitter.cfg')
request = twitter.request(resource, params)
if request.status_code == 200:
return request
elif "Not authorized" in request.text:
return -1
elif "page does not exist" in request.text:
return -2
else:
print >> sys.stderr, 'Got error:', request.text, '\nsleeping for 15 minutes.'
sys.stderr.flush()
time.sleep(61 * 15)
In [29]:
def get_followers(screen_name):
""" Return a dictionary of the users that follows this person on Twitter.
Args:
screen_name: a string of a Twitter screen name
Returns:
A dictionary of dictionary, one per follower containing {index, user_info} pairs.
Note: Many users follow more than 200 accounts; we will return those accounts too.
"""
followers = {}
cursor = -1
key = 0
rate_limit = 1
while True:
if rate_limit == 15:
rate_limit = 1
print >> sys.stderr, 'Avoided rate_limit error: \nsleeping for 15 minutes.'
time.sleep(61 * 15)
request = robust_request(twitter,'followers/list',
{'screen_name': screen_name, 'count':200, 'cursor': cursor})
json_response = request.json()
# Get 200 followers from json_response
for follower in json_response['users']:
followers[follower['screen_name']] = follower
key = key + 1
# If user has < 200 followers or to retrieve last 200 followers
if json_response["next_cursor"] == 0:
break
# Update cursor to get next 200 followers
cursor = json_response["next_cursor"]
rate_limit = rate_limit + 1
print "Collected %d followers" % key
return followers
In [30]:
'''
This method takes a comma separated list of screennames and returns user objects for them
'''
def get_follower_objects(screen_names):
follower_objects = {}
rate_limit = 1
key = 0
while True:
if rate_limit == 180:
rate_limit = 1
print sys.stderr, 'Avoided rate_limit error: \nsleeping for 15 minutes.'
time.sleep(61 * 15)
request = robust_request(twitter,'users/lookup',{'screen_name': screen_names})
for obj in request:
followers_object[request[obj['name']]] = obj
key = key + 1
print "Collected %d followers" % key
return follower_objects
In [31]:
def get_followers_and_pickle_object(account_name):
""" Calls get_followers() to collect all followers of a Twitter account
and then saves it by pickling for future processing purposes.
Args: A string representing the screen_name of a Twitter account,
whose followers we need.
Returns : A dictionary of { screen_name : user_info } pairs of followers,
where user_info is a dictionary object containing user information
"""
followers = get_followers(account_name)
fileObject = open("raw_data_" + account_name,'wb')
pickle.dump(followers,fileObject)
fileObject.close()
return followers
In [32]:
def get_followers_from_pickle(account_name, path="raw_data_"):
""" Returns all followers of a Twitter account by retrieving saved data,
using pickle method.
Args: A string representing the screen_name of a Twitter account,
whose followers we need.
path is a string representing the path of the pickled files
Returns : A dictionary of { screen_name : user_info } pairs of followers,
where user_info is a dictionary object containing user information
"""
fileObject = open(path + account_name,'r')
followers = pickle.load(fileObject)
fileObject.close()
return followers
In [33]:
def get_follower_objects_and_pickle_object(account_name):
"""this method calls get_follower_objects() and pickles the result into a file"""
followers = get_follower_objects(account_name)
fileObject = open("raw_data_" + account_name + "_objects",'wb')
pickle.dump(followers,fileObject)
fileObject.close()
return followers
In [35]:
caltechalumni_followers = get_followers_and_pickle_object("caltechalumni")
In [ ]:
Caltech_Alumni_pickle = get_followers_and_pickle_object(Caltech_Alumni)