notebook.community

Edit and run



In [1]:

    
#!/usr/bin/env python 
#The above line is not needed here

'''
This script accesses twitter through [tweepy's] Oauth mechanism and reads the authenticating user's data. 
Formats it into a markdown file for consumption with jekyll.
Essentially the aim is to mirror the user's tweets in case twitter goes bonkers.
'''

# Imports
import tweepy #to talk to twitter through oauth. 85 contributors as of Nov 2014! https://github.com/tweepy/tweepy
import os  #do source the appropriate environ variables (TWITTER_X below) using a source call. See readme.md
    
# Consumer keys and access tokens, used for OAuth will be read from environ variables
 
# OAuth process, using the keys and tokens
auth = tweepy.OAuthHandler(consumer_key=os.environ.get('TWITTER_CONSUMER_KEY'), 
                           consumer_secret=os.environ.get('TWITTER_CONSUMER_SECRET'))
auth.set_access_token(key=os.environ.get('TWITTER_ACCESS_TOKEN'), secret=os.environ.get('TWITTER_ACCESS_TOKEN_SECRET'))
 
# Creation of the actual interface, using authentication
api = tweepy.API(auth_handler=auth, secure=True, retry_count=3) #wait_on_rate_limit
 
user = api.me()

print('Name: ' + user.name)
print('Location: ' + user.location)
#print('Friends: ' + str(user.friends_count))









    



Name: theja tulabandhula
Location: Cambridge MA



In [2]:

    
#Fetching my tweets, there is an upper limit of 3200 I think. I am well below this limit, have to investigate this further.
#http://tweepy.readthedocs.org/en/v2.3.0/api.html?highlight=user_timeline#API.user_timeline
user_tweets = api.user_timeline(screen_name = 'theja2t', count = 3200, include_rts = True)



In [4]:

    
len(user_tweets)









    Out[4]:





173



In [31]:

    
#Formatting for Jekyll

import time
f_twitter =open('../../_posts/2014-11-16-Theja-the-sparse-tweeter.md', 'wb')#hardcoded date of the article.
f_twitter.write('---\n')
f_twitter.write('layout: post\n')
f_twitter.write('title:  "Theja\'s tweeting history using tweepy" \n')
f_twitter.write('---\n\n')
f_twitter.write('\n\n\nSince extracting my answers from stackexchange (only crossvalidated though), I thought I should do it for twitter as well.')
f_twitter.write(' I used the tweepy package to get my tweets. ') 
f_twitter.write(' The python code is available as a notebook [here](http://nbviewer.ipython.org/github/thejat/thejat.github.io/blob/master/static/scripts/TwitterDump.ipynb). ')
f_twitter.write(' This list was generated on '+time.asctime( time.localtime(time.time()) )+'. Hope you find this a good random walk.\n\n\n')

mmyy = datetime.date.today().strftime('%B %Y')
f_twitter.write('\n------\n')
f_twitter.write('\n###  ' + mmyy + ': ')
for tweet in user_tweets:
    tweet_mmyy = tweet.created_at.strftime('%B %Y')
    if(mmyy != tweet_mmyy):
        mmyy = tweet_mmyy
        f_twitter.write('------\n')
        f_twitter.write('\n###  ' + mmyy + ': ')
    f_twitter.write('\n - *Date ' + str(tweet.created_at) + '* > ')
    f_twitter.write('{% raw %}' + tweet.text.encode('utf-8')+'\n{% endraw%}\n\n')
        
f_twitter.close()



In [ ]:

    
'''
Below this is only for my own reference. Akin to backup slides :)
'''



In [36]:

    
#Misc 1: home_timeline to get my and my friends' data. If I only want my own data, I should use user_timeline()

#http://tweepy.readthedocs.org/en/v2.3.0/api.html?highlight=user_timeline#API.home_timeline

our_tweets = api.home_timeline(count = 5, include_rts = True)
for tweet in our_tweets: # if you want to print from old to new, use reversed() function
    print tweet.text









    



"In Japanese, you wouldn't say, "I go to the store." It would be more like "I to the store go." In other words, everyone talks like Yoda."
Naruto ended this week! Thank you to the creators and everyone for the hard work.
log(1+2+3)=log1+log2+log3
"Simple solutions are key to maintainability!" one of the most important keys to success.
'Just as Ramsey theory elucidates the impossibility of total disorder, discrepancy theory studies the deviations from total uniformity'-wiki






    Out[36]:





"\n#Prints a lot of text to screen\nstuff = api.user_timeline(screen_name = 'theja2t', count = 2, include_rts = True)\nfor status in stuff:\n    print status._json\n    print status.author, status.user\n    print dir(status)\n"



In [4]:

    
#Misc 2: Getting the authenticating user's public tweets, friends and mentions 
def fetch_public_tweets(api):
    """Fetch public tweets from my timeline"""
    public_tweets = api.home_timeline()
    return public_tweets

def fetch_friends(api):
    """Fetch friend list from twitter"""
    friends = api.friends_ids()
    return friends

def fetch_mentions(api):
    """Fetch mentions from twitter"""
    replies = api.mentions_timeline()
    return replies

friends = fetch_friends(api)
replies = fetch_mentions(api)
public_tweets = fetch_public_tweets(api)

log(at='fetched_from_api', friends=len(friends), mentions=len(replies), ptweets=len(public_tweets))









    



at=fetched_from_api friends=60 mentions=7 ptweets=20



In [25]:

    
#Misc 3: Show the rate Limits
'''
status = api.rate_limit_status()
dir(status)
'''



In [2]:

    
# Misc 4: Check if the environment variables for authentication exist
def log(**kwargs):
    # from https://github.com/chooper/twitter-repeater
    print ' '.join( "{0}={1}".format(k,v) for k,v in sorted(kwargs.items()) )
    
def validate_env():
    keys = [
        'TWITTER_CONSUMER_KEY',
        'TWITTER_CONSUMER_SECRET',
        'TWITTER_ACCESS_TOKEN',
        'TWITTER_ACCESS_TOKEN_SECRET',
        ]

    # Check for missing env vars
    for key in keys:
        v = os.environ.get(key)
        if not v:
            log(at='validate_env', status='missing', var=key)
            raise ValueError("Missing ENV var: {0}".format(key))

    # Log success
    log(at='validate_env', status='ok')

#validate_env()









    



at=validate_env status=ok



In [15]:

    
#Misc 5: A helper function which indicates whether we have been rate limited or not
#Potential rate limit issues, from http://stackoverflow.com/questions/21308762/avoid-twitter-api-limitation-with-tweepy?rq=1
def test_rate_limit(api, wait=True, buffer=.1):
    """
    Tests whether the rate limit of the last request has been reached.
    :param api: The `tweepy` api instance.
    :param wait: A flag indicating whether to wait for the rate limit reset
                 if the rate limit has been reached.
    :param buffer: A buffer time in seconds that is added on to the waiting
                   time as an extra safety margin.
    :return: True if it is ok to proceed with the next request. False otherwise.
    """
    #Get the number of remaining requests
    remaining = int(api.last_response.getheader('x-rate-limit-remaining'))
    #Check if we have reached the limit
    if remaining == 0:
        limit = int(api.last_response.getheader('x-rate-limit-limit'))
        reset = int(api.last_response.getheader('x-rate-limit-reset'))
        #Parse the UTC time
        reset = datetime.fromtimestamp(reset)
        #Let the user know we have reached the rate limit
        print "0 of {} requests remaining until {}.".format(limit, reset)

        if wait:
            #Determine the delay and sleep
            delay = (reset - datetime.now()).total_seconds() + buffer
            print "Sleeping for {}s...".format(delay)
            sleep(delay)
            #We have waited for the rate limit reset. OK to proceed.
            return True
        else:
            #We have reached the rate limit. The user needs to handle the rate limit manually.
            return False 

    #We have not reached the rate limit
    return True
'''
test_rate_limit(api)
print 'limit' + str(api.last_response.getheader('x-rate-limit-limit'))
print 'remaining' + str(api.last_response.getheader('x-rate-limit-remaining'))
'''









    Out[15]:





True



In [ ]:

    
#Misc 6: Haven't tried. [Source to be attributed]
'''
import urllib
import xml.dom.minidom as minidom

def printTweets(username):
 timeline_xml = urllib.urlopen("http://twitter.com/statuses/user_timeline.json?screen_name="+username)
 doc = minidom.parse(timeline_xml) # we're using the twitter xml format
 tweets = doc.getElementsByTagName("text") # tweet text is in ...
 
 for tweet in tweets:
  print "tweet:",tweet.childNodes[0].data,"\n"

## call the our function
printTweets("theja2t")
'''



In [ ]:

    
#Misc 7: Something to do with pages. [To be investigated]
for page in tweepy.Cursor(api.user_timeline(include_rts=True), count=200).pages(16):
    page_list.append(page)
    n = n+1
    print n

for page in page_list:
    for status in page:
       print status.text



In [ ]:

    
#Misc 8: latest status of the authenticating user
for status in tweepy.Cursor(api.user_timeline).items():
    lastid = status.id
    laststatus = api.get_status(lastid).text
    break
#laststatus1 = next(tweepy.Cursor(api.user_timeline).items(), None) #altenative, without for loop, from StackOverfloe