notebook.community

Edit and run



In [26]:

    
# Imports needed for this ipython file

from collections import Counter
import ConfigParser
import matplotlib.pyplot as plt
import networkx as nx
import sys
import time
import pickle
import os
from TwitterAPI import TwitterAPI

%matplotlib inline



In [27]:

    
def get_twitter(config_file):
    """ 
    Args:
      config_file ... A config file in ConfigParser format with Twitter credentials
    Returns:
      An instance of TwitterAPI.
    """
    config = ConfigParser.ConfigParser()
    config.read(config_file)
    twitter = TwitterAPI(
                   config.get('twitter', 'consumer_key'),
                   config.get('twitter', 'consumer_secret'),
                   config.get('twitter', 'access_token'),
                   config.get('twitter', 'access_token_secret'))
    return twitter

twitter = get_twitter('twitter.cfg')
print('Established Twitter connection.')









    



Established Twitter connection.



In [28]:

    
def robust_request(twitter, resource, params, max_tries=5):
    """ If a Twitter request fails, sleep for 15 minutes.
    Do this at most max_tries times before quitting.
    Args:
      twitter .... A TwitterAPI object.
      resource ... A resource string to request.
      params ..... A parameter dictionary for the request.
      max_tries .. The maximum number of tries to attempt.
    Returns:
      A TwitterResponse object, or None if failed.
    """
    for i in range(max_tries):
        try:
            request = twitter.request(resource, params)
        except:
            print >> sys.stderr, 'Got Connection error: sleeping for 15 minutes.'
            sys.stderr.flush()
            time.sleep(61 * 15)
            twitter = get_twitter('twitter.cfg')
            request = twitter.request(resource, params)
        if request.status_code == 200:
            return request
        elif "Not authorized" in request.text:
            return -1
        elif "page does not exist" in request.text:
            return -2
        else:
            print >> sys.stderr, 'Got error:', request.text, '\nsleeping for 15 minutes.'
            sys.stderr.flush()
            time.sleep(61 * 15)



In [29]:

    
def get_followers(screen_name):
    """ Return a dictionary of the users that follows this person on Twitter.
    
    Args:
        screen_name: a string of a Twitter screen name
    Returns:
        A dictionary of dictionary, one per follower containing {index, user_info} pairs.
    Note: Many users follow more than 200 accounts; we will return those accounts too.
    """
    followers = {}
    cursor = -1
    key = 0
    rate_limit = 1
    while True:
        if rate_limit == 15:
            rate_limit = 1
            print >> sys.stderr, 'Avoided rate_limit error: \nsleeping for 15 minutes.'
            time.sleep(61 * 15)
        request = robust_request(twitter,'followers/list', 
                                 {'screen_name': screen_name, 'count':200, 'cursor': cursor})
        json_response = request.json()
        
        # Get 200 followers from json_response
        for follower in json_response['users']:
                followers[follower['screen_name']] = follower
                key = key + 1
                
        # If user has < 200 followers or to retrieve last 200 followers
        if json_response["next_cursor"] == 0:
            break
            
        # Update cursor to get next 200 followers
        cursor = json_response["next_cursor"]
        rate_limit = rate_limit + 1
        print "Collected %d followers" % key
    return followers



In [30]:

    
'''
This method takes a comma separated list of screennames and returns user objects for them
'''
def get_follower_objects(screen_names):
    follower_objects = {}
    rate_limit = 1
    key = 0
    while True:
        if rate_limit == 180:
            rate_limit = 1
            print sys.stderr, 'Avoided rate_limit error: \nsleeping for 15 minutes.'
            time.sleep(61 * 15) 
        request = robust_request(twitter,'users/lookup',{'screen_name': screen_names})
        for obj in request:
            followers_object[request[obj['name']]] = obj
            key = key + 1
            print "Collected %d followers" % key
    
    return follower_objects



In [31]:

    
def get_followers_and_pickle_object(account_name):
    """ Calls get_followers() to collect all followers of a Twitter account
        and then saves it by pickling for future processing purposes.
    
    Args: A string representing the screen_name of a Twitter account,
          whose followers we need.
    Returns : A dictionary of { screen_name : user_info } pairs of followers,
              where user_info is a dictionary object containing user information
    """
    followers = get_followers(account_name)
    fileObject = open("raw_data_" + account_name,'wb')
    pickle.dump(followers,fileObject)
    fileObject.close()
    return followers



In [32]:

    
def get_followers_from_pickle(account_name, path="raw_data_"):
    """ Returns all followers of a Twitter account by retrieving saved data,
        using pickle method.
    
    Args: A string representing the screen_name of a Twitter account,
          whose followers we need.
          path is a string representing the path of the pickled files
    Returns : A dictionary of { screen_name : user_info } pairs of followers,
              where user_info is a dictionary object containing user information
    """
    fileObject = open(path + account_name,'r')
    followers = pickle.load(fileObject)
    fileObject.close()
    return followers



In [33]:

    
def get_follower_objects_and_pickle_object(account_name):
    """this method calls get_follower_objects() and pickles the result into a file"""
    
    followers = get_follower_objects(account_name)
    fileObject = open("raw_data_" + account_name + "_objects",'wb')
    pickle.dump(followers,fileObject)
    fileObject.close()
    return followers



In [35]:

    
caltechalumni_followers = get_followers_and_pickle_object("caltechalumni")









    



Collected 200 followers
Collected 400 followers
Collected 600 followers
Collected 800 followers
Collected 1000 followers
Collected 1200 followers
Collected 1400 followers
Collected 1600 followers
Collected 1800 followers



In [ ]:

    
Caltech_Alumni_pickle = get_followers_and_pickle_object(Caltech_Alumni)