This example roughly follows one in 'Mining the Social Web,' by Russell: https://github.com/ptwobrussell/Mining-the-Social-Web-2nd-Edition. In order to follow along, you need to install the twitter package, have a twitter account, and get a consumer key, consumer secret, oauth token, and oauth token secret. You can get the keys at http://apps.twitter.com by building a new app. Follow this handy tutorial: http://iag.me/socialmedia/how-to-create-a-twitter-app-in-8-easy-steps/.
In [ ]:
import twitter
In [ ]:
CONSUMER_KEY =
CONSUMER_SECRET =
OAUTH_TOKEN =
OAUTH_TOKEN_SECRET =
In [ ]:
# let's do the Oauth dance!
auth = twitter.oauth.OAuth(OAUTH_TOKEN, OAUTH_TOKEN_SECRET, CONSUMER_KEY, CONSUMER_SECRET)
twitter_api = twitter.Twitter(auth=auth)
# success if object created
print(twitter_api)
Twitter uses Where On Earth identifiers for places - see http://woeid.rosselliot.co.nz/ for quick lookups.
In [ ]:
WORLD_WOE_ID = 1
US_WOE_ID = 23424977
world_trends = twitter_api.trends.place(_id=WORLD_WOE_ID)
us_trends = twitter_api.trends.place(_id=US_WOE_ID)
print(world_trends)
print(us_trends)
In [ ]:
type(us_trends)
In [ ]:
len(us_trends)
The format above is json (javascript object notation). You can read about json format at http://json.org and some examples of json are here: http://json.org/example.html. Quite a bit of what moves around on the web is in json format.
In [ ]:
import json
In [ ]:
print(json.dumps(world_trends, indent=True))
In [ ]:
len(world_trends)
In [ ]:
type(world_trends[0])
In [ ]:
world_trends[0].keys() # last three are just a record of your request
In [ ]:
world_trends[0]['trends'][5]
In [ ]:
print(json.dumps(us_trends, indent=True))
In [ ]:
world_trends_set = set([trend['name'] for trend in world_trends[0]['trends']])
us_trends_set = set([trend['name'] for trend in us_trends[0]['trends']])
common_trends = world_trends_set.intersection(us_trends_set)
print(common_trends)
In [ ]:
q = '#deeplearning'
count = 100
search_results = twitter_api.search.tweets(q=q, count=count)
In [ ]:
type(search_results) # surprise! It's a dict
In [ ]:
search_results.keys()
In [ ]:
statuses = search_results['statuses']
metadata = search_results['search_metadata']
In [ ]:
type(statuses)
In [ ]:
len(statuses)
In [ ]:
type(statuses[0])
In [ ]:
type(metadata)
In [ ]:
metadata.keys()
In [ ]:
for _ in range(5):
print("Length of statuses", len(statuses))
try:
next_results = metadata['next_results']
except KeyError as e: # no more results when next_results doesn't exist
break
kwargs = dict([ kv.split('=') for kv in next_results[1:].split("&")])
print(kwargs) # to see what's happening
search_results = twitter_api.search.tweets(**kwargs)
statuses += search_results['statuses']
In [ ]:
# show one sample result
t = statuses[0]
print(json.dumps(t, indent=True))
In [ ]:
t['entities']['urls']
In [ ]:
status_texts = [status['text'] for status in statuses]
screen_names = [user_mention['screen_name'] for status in statuses
for user_mention in status['entities']['user_mentions']]
hashtags = [hashtag['text'] for status in statuses for hashtag in status['entities']['hashtags']]
print(json.dumps(status_texts[:5], indent=True))
print(json.dumps(screen_names[:5], indent=True))
print(json.dumps(hashtags[:5], indent=True))
In [ ]:
from collections import Counter
for item in [screen_names, hashtags]:
c = Counter(item)
print(c.most_common()[:10])
In [ ]:
# big list comprehension
retweets = [
# Store out a tuple of these three values ...
(status['retweet_count'],
status['retweeted_status']['user']['screen_name'],
status['text'])
for status in statuses
# ... so long as the status meets this condition.
if 'retweeted_status' in status
]
sorted(retweets, reverse=True)
In [ ]: