In [1]:
import tweepy as tw
import json
import pandas as pd
from collections import defaultdict
import os
from IPython.display import clear_output
In [2]:
TWITTER_CONFIG_FILE="twitter_config.json"
In [3]:
if not os.path.isfile(TWITTER_CONFIG_FILE):
with open("twitter_config.sample.json") as fp:
creds = json.load(fp)
for k in sorted(creds.keys()):
v = raw_input("Enter %s:\t" % k)
creds[k] = v
print creds
with open(TWITTER_CONFIG_FILE, "wb+") as fp:
json.dump(creds, fp, indent=4, sort_keys=True)
clear_output()
print "Printed credentials to file %s" % TWITTER_CONFIG_FILE
In [4]:
with open("twitter_config.json") as fp:
creds = json.load(fp)
print creds.keys()
In [5]:
auth = tw.OAuthHandler(creds["consumer_key"], creds["consumer_secret"])
auth.set_access_token(creds["access_token"], creds["access_token_secret"])
api = tw.API(auth)
print "Tweepy ready for search"
In [6]:
statuses = api.search(q="Donald Trump", count=1000)
In [7]:
def dict2df(data):
return pd.DataFrame(data.items(), columns=["item", "counts"]).sort_values("counts", ascending=False)
def get_entities(statuses):
hashtags = defaultdict(int)
mentions = defaultdict(int)
keys = ("hashtags", "user_mentions")
for s in statuses:
entities = s.entities
if "hashtags" in entities:
e = map(lambda x: x["text"], entities["hashtags"])
for t in e:
hashtags[t] += 1
if "user_mentions" in entities:
e = map(lambda x: x["screen_name"], entities["user_mentions"])
for t in e:
mentions[t] += 1
return dict2df(hashtags), dict2df(mentions)
In [8]:
hashtags, mentions = get_entities(statuses)
In [9]:
len(statuses)
Out[9]:
In [10]:
hashtags
Out[10]:
In [11]:
mentions
Out[11]:
In [ ]: