In [1]:
#!/usr/bin/env python
#The above line is not needed here
'''
This script accesses twitter through [tweepy's] Oauth mechanism and reads the authenticating user's data.
Formats it into a markdown file for consumption with jekyll.
Essentially the aim is to mirror the user's tweets in case twitter goes bonkers.
'''
# Imports
import tweepy #to talk to twitter through oauth. 85 contributors as of Nov 2014! https://github.com/tweepy/tweepy
import os #do source the appropriate environ variables (TWITTER_X below) using a source call. See readme.md
# Consumer keys and access tokens, used for OAuth will be read from environ variables
# OAuth process, using the keys and tokens
auth = tweepy.OAuthHandler(consumer_key=os.environ.get('TWITTER_CONSUMER_KEY'),
consumer_secret=os.environ.get('TWITTER_CONSUMER_SECRET'))
auth.set_access_token(key=os.environ.get('TWITTER_ACCESS_TOKEN'), secret=os.environ.get('TWITTER_ACCESS_TOKEN_SECRET'))
# Creation of the actual interface, using authentication
api = tweepy.API(auth_handler=auth, secure=True, retry_count=3) #wait_on_rate_limit
user = api.me()
print('Name: ' + user.name)
print('Location: ' + user.location)
#print('Friends: ' + str(user.friends_count))
In [2]:
#Fetching my tweets, there is an upper limit of 3200 I think. I am well below this limit, have to investigate this further.
#http://tweepy.readthedocs.org/en/v2.3.0/api.html?highlight=user_timeline#API.user_timeline
user_tweets = api.user_timeline(screen_name = 'theja2t', count = 3200, include_rts = True)
In [4]:
len(user_tweets)
Out[4]:
In [31]:
#Formatting for Jekyll
import time
f_twitter =open('../../_posts/2014-11-16-Theja-the-sparse-tweeter.md', 'wb')#hardcoded date of the article.
f_twitter.write('---\n')
f_twitter.write('layout: post\n')
f_twitter.write('title: "Theja\'s tweeting history using tweepy" \n')
f_twitter.write('---\n\n')
f_twitter.write('\n\n\nSince extracting my answers from stackexchange (only crossvalidated though), I thought I should do it for twitter as well.')
f_twitter.write(' I used the tweepy package to get my tweets. ')
f_twitter.write(' The python code is available as a notebook [here](http://nbviewer.ipython.org/github/thejat/thejat.github.io/blob/master/static/scripts/TwitterDump.ipynb). ')
f_twitter.write(' This list was generated on '+time.asctime( time.localtime(time.time()) )+'. Hope you find this a good random walk.\n\n\n')
mmyy = datetime.date.today().strftime('%B %Y')
f_twitter.write('\n------\n')
f_twitter.write('\n### ' + mmyy + ': ')
for tweet in user_tweets:
tweet_mmyy = tweet.created_at.strftime('%B %Y')
if(mmyy != tweet_mmyy):
mmyy = tweet_mmyy
f_twitter.write('------\n')
f_twitter.write('\n### ' + mmyy + ': ')
f_twitter.write('\n - *Date ' + str(tweet.created_at) + '* > ')
f_twitter.write('{% raw %}' + tweet.text.encode('utf-8')+'\n{% endraw%}\n\n')
f_twitter.close()
In [ ]:
'''
Below this is only for my own reference. Akin to backup slides :)
'''
In [36]:
#Misc 1: home_timeline to get my and my friends' data. If I only want my own data, I should use user_timeline()
#http://tweepy.readthedocs.org/en/v2.3.0/api.html?highlight=user_timeline#API.home_timeline
our_tweets = api.home_timeline(count = 5, include_rts = True)
for tweet in our_tweets: # if you want to print from old to new, use reversed() function
print tweet.text
Out[36]:
In [4]:
#Misc 2: Getting the authenticating user's public tweets, friends and mentions
def fetch_public_tweets(api):
"""Fetch public tweets from my timeline"""
public_tweets = api.home_timeline()
return public_tweets
def fetch_friends(api):
"""Fetch friend list from twitter"""
friends = api.friends_ids()
return friends
def fetch_mentions(api):
"""Fetch mentions from twitter"""
replies = api.mentions_timeline()
return replies
friends = fetch_friends(api)
replies = fetch_mentions(api)
public_tweets = fetch_public_tweets(api)
log(at='fetched_from_api', friends=len(friends), mentions=len(replies), ptweets=len(public_tweets))
In [25]:
#Misc 3: Show the rate Limits
'''
status = api.rate_limit_status()
dir(status)
'''
In [2]:
# Misc 4: Check if the environment variables for authentication exist
def log(**kwargs):
# from https://github.com/chooper/twitter-repeater
print ' '.join( "{0}={1}".format(k,v) for k,v in sorted(kwargs.items()) )
def validate_env():
keys = [
'TWITTER_CONSUMER_KEY',
'TWITTER_CONSUMER_SECRET',
'TWITTER_ACCESS_TOKEN',
'TWITTER_ACCESS_TOKEN_SECRET',
]
# Check for missing env vars
for key in keys:
v = os.environ.get(key)
if not v:
log(at='validate_env', status='missing', var=key)
raise ValueError("Missing ENV var: {0}".format(key))
# Log success
log(at='validate_env', status='ok')
#validate_env()
In [15]:
#Misc 5: A helper function which indicates whether we have been rate limited or not
#Potential rate limit issues, from http://stackoverflow.com/questions/21308762/avoid-twitter-api-limitation-with-tweepy?rq=1
def test_rate_limit(api, wait=True, buffer=.1):
"""
Tests whether the rate limit of the last request has been reached.
:param api: The `tweepy` api instance.
:param wait: A flag indicating whether to wait for the rate limit reset
if the rate limit has been reached.
:param buffer: A buffer time in seconds that is added on to the waiting
time as an extra safety margin.
:return: True if it is ok to proceed with the next request. False otherwise.
"""
#Get the number of remaining requests
remaining = int(api.last_response.getheader('x-rate-limit-remaining'))
#Check if we have reached the limit
if remaining == 0:
limit = int(api.last_response.getheader('x-rate-limit-limit'))
reset = int(api.last_response.getheader('x-rate-limit-reset'))
#Parse the UTC time
reset = datetime.fromtimestamp(reset)
#Let the user know we have reached the rate limit
print "0 of {} requests remaining until {}.".format(limit, reset)
if wait:
#Determine the delay and sleep
delay = (reset - datetime.now()).total_seconds() + buffer
print "Sleeping for {}s...".format(delay)
sleep(delay)
#We have waited for the rate limit reset. OK to proceed.
return True
else:
#We have reached the rate limit. The user needs to handle the rate limit manually.
return False
#We have not reached the rate limit
return True
'''
test_rate_limit(api)
print 'limit' + str(api.last_response.getheader('x-rate-limit-limit'))
print 'remaining' + str(api.last_response.getheader('x-rate-limit-remaining'))
'''
Out[15]:
In [ ]:
#Misc 6: Haven't tried. [Source to be attributed]
'''
import urllib
import xml.dom.minidom as minidom
def printTweets(username):
timeline_xml = urllib.urlopen("http://twitter.com/statuses/user_timeline.json?screen_name="+username)
doc = minidom.parse(timeline_xml) # we're using the twitter xml format
tweets = doc.getElementsByTagName("text") # tweet text is in ...
for tweet in tweets:
print "tweet:",tweet.childNodes[0].data,"\n"
## call the our function
printTweets("theja2t")
'''
In [ ]:
#Misc 7: Something to do with pages. [To be investigated]
for page in tweepy.Cursor(api.user_timeline(include_rts=True), count=200).pages(16):
page_list.append(page)
n = n+1
print n
for page in page_list:
for status in page:
print status.text
In [ ]:
#Misc 8: latest status of the authenticating user
for status in tweepy.Cursor(api.user_timeline).items():
lastid = status.id
laststatus = api.get_status(lastid).text
break
#laststatus1 = next(tweepy.Cursor(api.user_timeline).items(), None) #altenative, without for loop, from StackOverfloe